124 files changed, 17318 insertions, 9905 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6d12581..8bc5ef9 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,16 +18,25 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -41,12 +50,17 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
            cl::init(cl::BOU_UNSET));
 
 char AsmPrinter::ID = 0;
-AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
-                       const TargetAsmInfo *T, bool VDef)
+AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
+                       const MCAsmInfo *T, bool VDef)
   : MachineFunctionPass(&ID), FunctionNumber(0), O(o),
-    TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
-    IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U),
-    PrevDLT(0, ~0U, ~0U) {
+    TM(tm), MAI(T), TRI(tm.getRegisterInfo()),
+
+    OutContext(*new MCContext()),
+    // FIXME: Pass instprinter to streamer.
+    OutStreamer(*createAsmStreamer(OutContext, O, *T, 0)),
+
+    LastMI(0), LastFn(0), Counter(~0U),
+    PrevDLT(0, 0, ~0U, ~0U) {
   DW = 0; MMI = 0;
   switch (AsmVerbose) {
   case cl::BOU_UNSET: VerboseAsm = VDef;  break;
@@ -59,188 +73,124 @@ AsmPrinter::~AsmPrinter() {
   for (gcp_iterator I = GCMetadataPrinters.begin(),
                     E = GCMetadataPrinters.end(); I != E; ++I)
     delete I->second;
-}
-
-/// SwitchToTextSection - Switch to the specified text section of the executable
-/// if we are not already in it!
-///
-void AsmPrinter::SwitchToTextSection(const char *NewSection,
-                                     const GlobalValue *GV) {
-  std::string NS;
-  if (GV && GV->hasSection())
-    NS = TAI->getSwitchToSectionDirective() + GV->getSection();
-  else
-    NS = NewSection;
   
-  // If we're already in this section, we're done.
-  if (CurrentSection == NS) return;
-
-  // Close the current section, if applicable.
-  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
-    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
-
-  CurrentSection = NS;
-
-  if (!CurrentSection.empty())
-    O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
-
-  IsInTextSection = true;
+  delete &OutStreamer;
+  delete &OutContext;
 }
 
-/// SwitchToDataSection - Switch to the specified data section of the executable
-/// if we are not already in it!
-///
-void AsmPrinter::SwitchToDataSection(const char *NewSection,
-                                     const GlobalValue *GV) {
-  std::string NS;
-  if (GV && GV->hasSection())
-    NS = TAI->getSwitchToSectionDirective() + GV->getSection();
-  else
-    NS = NewSection;
-  
-  // If we're already in this section, we're done.
-  if (CurrentSection == NS) return;
-
-  // Close the current section, if applicable.
-  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
-    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
-
-  CurrentSection = NS;
-  
-  if (!CurrentSection.empty())
-    O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
-
-  IsInTextSection = false;
+TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+  return TM.getTargetLowering()->getObjFileLowering();
 }
 
-/// SwitchToSection - Switch to the specified section of the executable if we
-/// are not already in it!
-void AsmPrinter::SwitchToSection(const Section* NS) {
-  const std::string& NewSection = NS->getName();
-
-  // If we're already in this section, we're done.
-  if (CurrentSection == NewSection) return;
-
-  // Close the current section, if applicable.
-  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
-    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
-
-  // FIXME: Make CurrentSection a Section* in the future
-  CurrentSection = NewSection;
-  CurrentSection_ = NS;
-
-  if (!CurrentSection.empty()) {
-    // If section is named we need to switch into it via special '.section'
-    // directive and also append funky flags. Otherwise - section name is just
-    // some magic assembler directive.
-    if (NS->isNamed())
-      O << TAI->getSwitchToSectionDirective()
-        << CurrentSection
-        << TAI->getSectionFlags(NS->getFlags());
-    else
-      O << CurrentSection;
-    O << TAI->getDataSectionStartSuffix() << '\n';
-  }
-
-  IsInTextSection = (NS->getFlags() & SectionFlags::Code);
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+  return OutStreamer.getCurrentSection();
 }
 
+
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
+  if (VerboseAsm)
+    AU.addRequired<MachineLoopInfo>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
-  Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+    .Initialize(OutContext, TM);
   
-  if (TAI->doesAllowQuotesInName())
+  Mang = new Mangler(M, MAI->getGlobalPrefix(), MAI->getPrivateGlobalPrefix(),
+                     MAI->getLinkerPrivateGlobalPrefix());
+  
+  if (MAI->doesAllowQuotesInName())
     Mang->setUseQuotes(true);
+
+  if (MAI->doesAllowNameToStartWithDigit())
+    Mang->setSymbolsCanStartWithDigit(true);
   
-  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
-  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  // Allow the target to emit any magic that it wants at the start of the file.
+  EmitStartOfAsmFile(M);
 
-  if (TAI->hasSingleParameterDotFile()) {
+  if (MAI->hasSingleParameterDotFile()) {
     /* Very minimal debug info. It is ignored if we emit actual
-       debug info. If we don't, this at helps the user find where
+       debug info. If we don't, this at least helps the user find where
        a function came from. */
     O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n";
   }
 
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
   for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
     if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
-      MP->beginAssembly(O, *this, *TAI);
+      MP->beginAssembly(O, *this, *MAI);
   
   if (!M.getModuleInlineAsm().empty())
-    O << TAI->getCommentString() << " Start of file scope inline assembly\n"
+    O << MAI->getCommentString() << " Start of file scope inline assembly\n"
       << M.getModuleInlineAsm()
-      << '\n' << TAI->getCommentString()
+      << '\n' << MAI->getCommentString()
       << " End of file scope inline assembly\n";
 
-  SwitchToDataSection("");   // Reset back to no section.
-  
-  if (TAI->doesSupportDebugInformation() ||
-      TAI->doesSupportExceptionHandling()) {
-    MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    if (MMI)
-      MMI->AnalyzeModule(M);
-    DW = getAnalysisIfAvailable<DwarfWriter>();
-    if (DW)
-      DW->BeginModule(&M, MMI, O, this, TAI);
-  }
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  if (MMI)
+    MMI->AnalyzeModule(M);
+  DW = getAnalysisIfAvailable<DwarfWriter>();
+  if (DW)
+    DW->BeginModule(&M, MMI, O, this, MAI);
 
   return false;
 }
 
 bool AsmPrinter::doFinalization(Module &M) {
+  // Emit global variables.
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    PrintGlobalVariable(I);
+  
   // Emit final debug information.
-  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
     DW->EndModule();
   
   // If the target wants to know about weak references, print them all.
-  if (TAI->getWeakRefDirective()) {
+  if (MAI->getWeakRefDirective()) {
     // FIXME: This is not lazy, it would be nice to only print weak references
     // to stuff that is actually used.  Note that doing so would require targets
     // to notice uses in operands (due to constant exprs etc).  This should
     // happen with the MC stuff eventually.
-    SwitchToDataSection("");
 
     // Print out module-level global variables here.
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
       if (I->hasExternalWeakLinkage())
-        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+        O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n';
     }
     
-    for (Module::const_iterator I = M.begin(), E = M.end();
-         I != E; ++I) {
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
       if (I->hasExternalWeakLinkage())
-        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+        O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n';
     }
   }
 
-  if (TAI->getSetDirective()) {
-    if (!M.alias_empty())
-      SwitchToSection(TAI->getTextSection());
-
+  if (MAI->getSetDirective()) {
     O << '\n';
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
          I != E; ++I) {
-      std::string Name = Mang->getValueName(I);
-      std::string Target;
+      std::string Name = Mang->getMangledName(I);
 
       const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
-      Target = Mang->getValueName(GV);
+      std::string Target = Mang->getMangledName(GV);
 
-      if (I->hasExternalLinkage() || !TAI->getWeakRefDirective())
+      if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
         O << "\t.globl\t" << Name << '\n';
       else if (I->hasWeakLinkage())
-        O << TAI->getWeakRefDirective() << Name << '\n';
+        O << MAI->getWeakRefDirective() << Name << '\n';
       else if (!I->hasLocalLinkage())
-        assert(0 && "Invalid alias linkage");
+        llvm_unreachable("Invalid alias linkage");
 
       printVisibility(Name, I->getVisibility());
 
-      O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n';
+      O << MAI->getSetDirective() << ' ' << Name << ", " << Target << '\n';
     }
   }
 
@@ -248,45 +198,43 @@ bool AsmPrinter::doFinalization(Module &M) {
   assert(MI && "AsmPrinter didn't require GCModuleInfo?");
   for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
     if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
-      MP->finishAssembly(O, *this, *TAI);
+      MP->finishAssembly(O, *this, *MAI);
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (TAI->getNonexecutableStackDirective())
-      O << TAI->getNonexecutableStackDirective() << '\n';
+    if (MAI->getNonexecutableStackDirective())
+      O << MAI->getNonexecutableStackDirective() << '\n';
 
+  
+  // Allow the target to emit any magic that it wants at the end of the file,
+  // after everything else has gone out.
+  EmitEndOfAsmFile(M);
+  
   delete Mang; Mang = 0;
   DW = 0; MMI = 0;
+  
+  OutStreamer.Finish();
   return false;
 }
 
-const std::string &
-AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF,
-                                     std::string &Name) const {
-  assert(MF && "No machine function?");
-  Name = MF->getFunction()->getName();
-  if (Name.empty())
-    Name = Mang->getValueName(MF->getFunction());
-  Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() +
-                              Name + ".eh", TAI->getGlobalPrefix());
-  return Name;
-}
-
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   // What's my mangled name?
-  CurrentFnName = Mang->getValueName(MF.getFunction());
+  CurrentFnName = Mang->getMangledName(MF.getFunction());
   IncrementFunctionNumber();
+
+  if (VerboseAsm)
+    LI = &getAnalysis<MachineLoopInfo>();
 }
 
 namespace {
   // SectionCPs - Keep track the alignment, constpool entries per Section.
   struct SectionCPs {
-    const Section *S;
+    const MCSection *S;
     unsigned Alignment;
     SmallVector<unsigned, 4> CPEs;
-    SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {};
+    SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {};
   };
 }
 
@@ -303,9 +251,27 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
   // the same section together to reduce amount of section switch statements.
   SmallVector<SectionCPs, 4> CPSections;
   for (unsigned i = 0, e = CP.size(); i != e; ++i) {
-    MachineConstantPoolEntry CPE = CP[i];
+    const MachineConstantPoolEntry &CPE = CP[i];
     unsigned Align = CPE.getAlignment();
-    const Section* S = TAI->SelectSectionForMachineConst(CPE.getType());
+    
+    SectionKind Kind;
+    switch (CPE.getRelocationInfo()) {
+    default: llvm_unreachable("Unknown section kind");
+    case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+    case 1:
+      Kind = SectionKind::getReadOnlyWithRelLocal();
+      break;
+    case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16();break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+    }
+
+    const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+    
     // The number of sections are small, just do a linear search from the
     // last section to the first.
     bool Found = false;
@@ -328,7 +294,7 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
 
   // Now print stuff into the calculated sections.
   for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
-    SwitchToSection(CPSections[i].S);
+    OutStreamer.SwitchSection(CPSections[i].S);
     EmitAlignment(Log2_32(CPSections[i].Alignment));
 
     unsigned Offset = 0;
@@ -344,11 +310,12 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
       const Type *Ty = CPE.getType();
       Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
 
-      O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
-        << CPI << ":\t\t\t\t\t";
+      O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+        << CPI << ':';
       if (VerboseAsm) {
-        O << TAI->getCommentString() << ' ';
-        WriteTypeSymbolic(O, CPE.getType(), 0);
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " constant ";
+        WriteTypeSymbolic(O, CPE.getType(), MF->getFunction()->getParent());
       }
       O << '\n';
       if (CPE.isMachineConstantPoolEntry())
@@ -373,20 +340,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
   // the appropriate section.
   TargetLowering *LoweringInfo = TM.getTargetLowering();
 
-  const char* JumpTableDataSection = TAI->getJumpTableDataSection();
   const Function *F = MF.getFunction();
-  unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
   bool JTInDiffSection = false;
-  if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
-      !JumpTableDataSection ||
-      SectionFlags & SectionFlags::Linkonce) {
+  if (F->isWeakForLinker() ||
+      (IsPic && !LoweringInfo->usesGlobalOffsetTable())) {
     // In PIC mode, we need to emit the jump table to the same section as the
     // function body itself, otherwise the label differences won't make sense.
     // We should also do if the section name is NULL or function is declared in
     // discardable section.
-    SwitchToSection(TAI->SectionForGlobal(F));
+    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang,
+                                                                    TM));
   } else {
-    SwitchToDataSection(JumpTableDataSection);
+    // Otherwise, drop it in the readonly section.
+    const MCSection *ReadOnlySection = 
+      getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+    OutStreamer.SwitchSection(ReadOnlySection);
     JTInDiffSection = true;
   }
   
@@ -402,21 +370,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     // the number of relocations the assembler will generate for the jump table.
     // Set directives are all printed before the jump table itself.
     SmallPtrSet<MachineBasicBlock*, 16> EmittedSets;
-    if (TAI->getSetDirective() && IsPic)
+    if (MAI->getSetDirective() && IsPic)
       for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
         if (EmittedSets.insert(JTBBs[ii]))
           printPICJumpTableSetLabel(i, JTBBs[ii]);
     
-    // On some targets (e.g. darwin) we want to emit two consequtive labels
+    // On some targets (e.g. Darwin) we want to emit two consequtive labels
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
-    if (JTInDiffSection) {
-      if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
-        O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) {
+      O << MAI->getLinkerPrivateGlobalPrefix()
+        << "JTI" << getFunctionNumber() << '_' << i << ":\n";
     }
     
-    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
       << '_' << i << ":\n";
     
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
@@ -429,15 +397,15 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
 void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                         const MachineBasicBlock *MBB,
                                         unsigned uid)  const {
-  bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
   
   // Use JumpTableDirective otherwise honor the entry size from the jump table
   // info.
-  const char *JTEntryDirective = TAI->getJumpTableDirective();
+  const char *JTEntryDirective = MAI->getJumpTableDirective(isPIC);
   bool HadJTEntryDirective = JTEntryDirective != NULL;
   if (!HadJTEntryDirective) {
     JTEntryDirective = MJTI->getEntrySize() == 4 ?
-      TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+      MAI->getData32bitsDirective() : MAI->getData64bitsDirective();
   }
 
   O << JTEntryDirective << ' ';
@@ -447,20 +415,18 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
   // emit the table entries as differences between two text section labels.
   // If we're emitting non-PIC code, then emit the entries as direct
   // references to the target basic blocks.
-  if (IsPic) {
-    if (TAI->getSetDirective()) {
-      O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-        << '_' << uid << "_set_" << MBB->getNumber();
-    } else {
-      printBasicBlockLabel(MBB, false, false, false);
-      // If the arch uses custom Jump Table directives, don't calc relative to
-      // JT
-      if (!HadJTEntryDirective) 
-        O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
-          << getFunctionNumber() << '_' << uid;
-    }
+  if (!isPIC) {
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  } else if (MAI->getSetDirective()) {
+    O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
+      << '_' << uid << "_set_" << MBB->getNumber();
   } else {
-    printBasicBlockLabel(MBB, false, false, false);
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+    // If the arch uses custom Jump Table directives, don't calc relative to
+    // JT
+    if (!HadJTEntryDirective) 
+      O << '-' << MAI->getPrivateGlobalPrefix() << "JTI"
+        << getFunctionNumber() << '_' << uid;
   }
 }
 
@@ -470,12 +436,12 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
 /// do nothing and return false.
 bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   if (GV->getName() == "llvm.used") {
-    if (TAI->getUsedDirective() != 0)    // No need to emit this at all.
+    if (MAI->getUsedDirective() != 0)    // No need to emit this at all.
       EmitLLVMUsedList(GV->getInitializer());
     return true;
   }
 
-  // Ignore debug and non-emitted data.
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
   if (GV->getSection() == "llvm.metadata" ||
       GV->hasAvailableExternallyLinkage())
     return true;
@@ -487,14 +453,14 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   const TargetData *TD = TM.getTargetData();
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
-    SwitchToDataSection(TAI->getStaticCtorsSection());
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
     EmitAlignment(Align, 0);
     EmitXXStructorList(GV->getInitializer());
     return true;
   } 
   
   if (GV->getName() == "llvm.global_dtors") {
-    SwitchToDataSection(TAI->getStaticDtorsSection());
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
     EmitAlignment(Align, 0);
     EmitXXStructorList(GV->getInitializer());
     return true;
@@ -503,45 +469,20 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   return false;
 }
 
-/// findGlobalValue - if CV is an expression equivalent to a single
-/// global value, return that value.
-const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
-    return GV;
-  else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
-    const TargetData *TD = TM.getTargetData();
-    unsigned Opcode = CE->getOpcode();    
-    switch (Opcode) {
-    case Instruction::GetElementPtr: {
-      const Constant *ptrVal = CE->getOperand(0);
-      SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
-      if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size()))
-        return 0;
-      return findGlobalValue(ptrVal);
-    }
-    case Instruction::BitCast:
-      return findGlobalValue(CE->getOperand(0));
-    default:
-      return 0;
-    }
-  }
-  return 0;
-}
-
-/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-
 void AsmPrinter::EmitLLVMUsedList(Constant *List) {
-  const char *Directive = TAI->getUsedDirective();
+  const char *Directive = MAI->getUsedDirective();
 
   // Should be an array of 'i8*'.
   ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
   
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    const GlobalValue *GV = findGlobalValue(InitList->getOperand(i));
-    if (TAI->emitUsedDirectiveFor(GV, Mang)) {
+    const GlobalValue *GV =
+      dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+    if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) {
       O << Directive;
       EmitConstantValueOnly(InitList->getOperand(i));
       O << '\n';
@@ -567,32 +508,6 @@ void AsmPrinter::EmitXXStructorList(Constant *List) {
     }
 }
 
-/// getGlobalLinkName - Returns the asm/link name of of the specified
-/// global variable.  Should be overridden by each target asm printer to
-/// generate the appropriate value.
-const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV,
-                                                 std::string &LinkName) const {
-  if (isa<Function>(GV)) {
-    LinkName += TAI->getFunctionAddrPrefix();
-    LinkName += Mang->getValueName(GV);
-    LinkName += TAI->getFunctionAddrSuffix();
-  } else {
-    LinkName += TAI->getGlobalVarAddrPrefix();
-    LinkName += Mang->getValueName(GV);
-    LinkName += TAI->getGlobalVarAddrSuffix();
-  }  
-  
-  return LinkName;
-}
-
-/// EmitExternalGlobal - Emit the external reference to a global variable.
-/// Should be overridden if an indirect reference should be used.
-void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
-  std::string GLN;
-  O << getGlobalLinkName(GV, GLN);
-}
-
-
 
 //===----------------------------------------------------------------------===//
 /// LEB 128 number encoding.
@@ -646,8 +561,8 @@ void AsmPrinter::EOL() const {
 
 void AsmPrinter::EOL(const std::string &Comment) const {
   if (VerboseAsm && !Comment.empty()) {
-    O << '\t'
-      << TAI->getCommentString()
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString()
       << ' '
       << Comment;
   }
@@ -656,22 +571,72 @@ void AsmPrinter::EOL(const std::string &Comment) const {
 
 void AsmPrinter::EOL(const char* Comment) const {
   if (VerboseAsm && *Comment) {
-    O << '\t'
-      << TAI->getCommentString()
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString()
       << ' '
       << Comment;
   }
   O << '\n';
 }
 
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+  switch (Encoding) {
+  case dwarf::DW_EH_PE_absptr:
+    return "absptr";
+  case dwarf::DW_EH_PE_omit:
+    return "omit";
+  case dwarf::DW_EH_PE_pcrel:
+    return "pcrel";
+  case dwarf::DW_EH_PE_udata4:
+    return "udata4";
+  case dwarf::DW_EH_PE_udata8:
+    return "udata8";
+  case dwarf::DW_EH_PE_sdata4:
+    return "sdata4";
+  case dwarf::DW_EH_PE_sdata8:
+    return "sdata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+    return "pcrel udata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+    return "pcrel sdata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+    return "pcrel udata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+    return "pcrel sdata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+    return "indirect pcrel udata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+    return "indirect pcrel sdata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+    return "indirect pcrel udata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+    return "indirect pcrel sdata8";
+  }
+
+  return 0;
+}
+
+void AsmPrinter::EOL(const char *Comment, unsigned Encoding) const {
+  if (VerboseAsm && *Comment) {
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString()
+      << ' '
+      << Comment;
+
+    if (const char *EncStr = DecodeDWARFEncoding(Encoding))
+      O << " (" << EncStr << ')';
+  }
+  O << '\n';
+}
+
 /// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
 /// unsigned leb128 value.
 void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
-  if (TAI->hasLEB128()) {
+  if (MAI->hasLEB128()) {
     O << "\t.uleb128\t"
       << Value;
   } else {
-    O << TAI->getData8bitsDirective();
+    O << MAI->getData8bitsDirective();
     PrintULEB128(Value);
   }
 }
@@ -679,11 +644,11 @@ void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
 /// EmitSLEB128Bytes - print an assembler byte data directive to compose a
 /// signed leb128 value.
 void AsmPrinter::EmitSLEB128Bytes(int Value) const {
-  if (TAI->hasLEB128()) {
+  if (MAI->hasLEB128()) {
     O << "\t.sleb128\t"
       << Value;
   } else {
-    O << TAI->getData8bitsDirective();
+    O << MAI->getData8bitsDirective();
     PrintSLEB128(Value);
   }
 }
@@ -691,29 +656,29 @@ void AsmPrinter::EmitSLEB128Bytes(int Value) const {
 /// EmitInt8 - Emit a byte directive and value.
 ///
 void AsmPrinter::EmitInt8(int Value) const {
-  O << TAI->getData8bitsDirective();
+  O << MAI->getData8bitsDirective();
   PrintHex(Value & 0xFF);
 }
 
 /// EmitInt16 - Emit a short directive and value.
 ///
 void AsmPrinter::EmitInt16(int Value) const {
-  O << TAI->getData16bitsDirective();
+  O << MAI->getData16bitsDirective();
   PrintHex(Value & 0xFFFF);
 }
 
 /// EmitInt32 - Emit a long directive and value.
 ///
 void AsmPrinter::EmitInt32(int Value) const {
-  O << TAI->getData32bitsDirective();
+  O << MAI->getData32bitsDirective();
   PrintHex(Value);
 }
 
 /// EmitInt64 - Emit a long long directive and value.
 ///
 void AsmPrinter::EmitInt64(uint64_t Value) const {
-  if (TAI->getData64bitsDirective()) {
-    O << TAI->getData64bitsDirective();
+  if (MAI->getData64bitsDirective()) {
+    O << MAI->getData64bitsDirective();
     PrintHex(Value);
   } else {
     if (TM.getTargetData()->isBigEndian()) {
@@ -734,7 +699,7 @@ static inline char toOctal(int X) {
 
 /// printStringChar - Print a char, escaped if necessary.
 ///
-static void printStringChar(raw_ostream &O, unsigned char C) {
+static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
   if (C == '"') {
     O << "\\\"";
   } else if (C == '\\') {
@@ -766,11 +731,11 @@ void AsmPrinter::EmitString(const std::string &String) const {
 }
 
 void AsmPrinter::EmitString(const char *String, unsigned Size) const {
-  const char* AscizDirective = TAI->getAscizDirective();
+  const char* AscizDirective = MAI->getAscizDirective();
   if (AscizDirective)
     O << AscizDirective;
   else
-    O << TAI->getAsciiDirective();
+    O << MAI->getAsciiDirective();
   O << '\"';
   for (unsigned i = 0; i < Size; ++i)
     printStringChar(O, String[i]);
@@ -813,31 +778,26 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
   NumBits = std::max(NumBits, ForcedAlignBits);
   
   if (NumBits == 0) return;   // No need to emit alignment.
-  if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
-  O << TAI->getAlignDirective() << NumBits;
-
-  unsigned FillValue = TAI->getTextAlignFillValue();
-  UseFillExpr &= IsInTextSection && FillValue;
-  if (UseFillExpr) {
-    O << ',';
-    PrintHex(FillValue);
-  }
-  O << '\n';
+  
+  unsigned FillValue = 0;
+  if (getCurrentSection()->getKind().isText())
+    FillValue = MAI->getTextAlignFillValue();
+  
+  OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0);
 }
 
-    
 /// EmitZeros - Emit a block of zeros.
 ///
 void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const {
   if (NumZeros) {
-    if (TAI->getZeroDirective()) {
-      O << TAI->getZeroDirective() << NumZeros;
-      if (TAI->getZeroDirectiveSuffix())
-        O << TAI->getZeroDirectiveSuffix();
+    if (MAI->getZeroDirective()) {
+      O << MAI->getZeroDirective() << NumZeros;
+      if (MAI->getZeroDirectiveSuffix())
+        O << MAI->getZeroDirectiveSuffix();
       O << '\n';
     } else {
       for (; NumZeros; --NumZeros)
-        O << TAI->getData8bitsDirective(AddrSpace) << "0\n";
+        O << MAI->getData8bitsDirective(AddrSpace) << "0\n";
     }
   }
 }
@@ -851,22 +811,22 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
     O << CI->getZExtValue();
   } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
     // This is a constant address for a global variable or function. Use the
-    // name of the variable or function as the address value, possibly
-    // decorating it with GlobalVarAddrPrefix/Suffix or
-    // FunctionAddrPrefix/Suffix (these all default to "" )
-    if (isa<Function>(GV)) {
-      O << TAI->getFunctionAddrPrefix()
-        << Mang->getValueName(GV)
-        << TAI->getFunctionAddrSuffix();
-    } else {
-      O << TAI->getGlobalVarAddrPrefix()
-        << Mang->getValueName(GV)
-        << TAI->getGlobalVarAddrSuffix();
-    }
+    // name of the variable or function as the address value.
+    O << Mang->getMangledName(GV);
   } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     const TargetData *TD = TM.getTargetData();
     unsigned Opcode = CE->getOpcode();    
     switch (Opcode) {
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      llvm_unreachable("FIXME: Don't support this constant cast expr");
     case Instruction::GetElementPtr: {
       // generate a symbolic expression for the byte address
       const Constant *ptrVal = CE->getOperand(0);
@@ -891,17 +851,6 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
       }
       break;
     }
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-      assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
-      break;
     case Instruction::BitCast:
       return EmitConstantValueOnly(CE->getOperand(0));
 
@@ -909,7 +858,8 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
       // Handle casts to pointers by changing them into casts to the appropriate
       // integer type.  This promotes constant folding and simplifies this code.
       Constant *Op = CE->getOperand(0);
-      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/);
+      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                        false/*ZExt*/);
       return EmitConstantValueOnly(Op);
     }
       
@@ -922,16 +872,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
 
       // We can emit the pointer value into this slot if the slot is an
       // integer slot greater or equal to the size of the pointer.
-      if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType()))
+      if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
         return EmitConstantValueOnly(Op);
 
       O << "((";
       EmitConstantValueOnly(Op);
-      APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty));
+      APInt ptrMask =
+        APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType()));
       
       SmallString<40> S;
       ptrMask.toStringUnsigned(S);
-      O << ") & " << S.c_str() << ')';
+      O << ") & " << S.str() << ')';
       break;
     }
     case Instruction::Add:
@@ -966,17 +917,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
       O << ')';
       break;
     default:
-      assert(0 && "Unsupported operator!");
+      llvm_unreachable("Unsupported operator!");
     }
   } else {
-    assert(0 && "Unknown constant value!");
+    llvm_unreachable("Unknown constant value!");
   }
 }
 
 /// printAsCString - Print the specified array as a C compatible string, only if
 /// the predicate isString is true.
 ///
-static void printAsCString(raw_ostream &O, const ConstantArray *CVA,
+static void printAsCString(formatted_raw_ostream &O, const ConstantArray *CVA,
                            unsigned LastElt) {
   assert(CVA->isString() && "Array is not string compatible!");
 
@@ -993,12 +944,12 @@ static void printAsCString(raw_ostream &O, const ConstantArray *CVA,
 ///
 void AsmPrinter::EmitString(const ConstantArray *CVA) const {
   unsigned NumElts = CVA->getNumOperands();
-  if (TAI->getAscizDirective() && NumElts && 
+  if (MAI->getAscizDirective() && NumElts && 
       cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) {
-    O << TAI->getAscizDirective();
+    O << MAI->getAscizDirective();
     printAsCString(O, CVA, NumElts-1);
   } else {
-    O << TAI->getAsciiDirective();
+    O << MAI->getAsciiDirective();
     printAsCString(O, CVA, NumElts);
   }
   O << '\n';
@@ -1053,48 +1004,65 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP,
                                       unsigned AddrSpace) {
   // FP Constants are printed as integer constants to avoid losing
   // precision...
+  LLVMContext &Context = CFP->getContext();
   const TargetData *TD = TM.getTargetData();
-  if (CFP->getType() == Type::DoubleTy) {
+  if (CFP->getType()->isDoubleTy()) {
     double Val = CFP->getValueAPF().convertToDouble();  // for comment only
     uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    if (TAI->getData64bitsDirective(AddrSpace)) {
-      O << TAI->getData64bitsDirective(AddrSpace) << i;
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " double value: " << Val;
+    if (MAI->getData64bitsDirective(AddrSpace)) {
+      O << MAI->getData64bitsDirective(AddrSpace) << i;
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " double " << Val;
+      }
       O << '\n';
     } else if (TD->isBigEndian()) {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word of double " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word of double " << Val;
+      }
       O << '\n';
     } else {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word of double " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word of double " << Val;
+      }
       O << '\n';
     }
     return;
-  } else if (CFP->getType() == Type::FloatTy) {
+  }
+  
+  if (CFP->getType()->isFloatTy()) {
     float Val = CFP->getValueAPF().convertToFloat();  // for comment only
-    O << TAI->getData32bitsDirective(AddrSpace)
+    O << MAI->getData32bitsDirective(AddrSpace)
       << CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    if (VerboseAsm)
-      O << '\t' << TAI->getCommentString() << " float " << Val;
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " float " << Val;
+    }
     O << '\n';
     return;
-  } else if (CFP->getType() == Type::X86_FP80Ty) {
+  }
+  
+  if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
     // api needed to prevent premature destruction
     APInt api = CFP->getValueAPF().bitcastToAPInt();
@@ -1105,110 +1073,148 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP,
     DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
                       &ignored);
     if (TD->isBigEndian()) {
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant halfword of ~"
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant halfword of x86_fp80 ~"
           << DoubleVal.convertToDouble();
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant halfword";
+      }
       O << '\n';
      } else {
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant halfword of ~"
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant halfword of x86_fp80 ~"
           << DoubleVal.convertToDouble();
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant halfword";
+      }
       O << '\n';
     }
-    EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) -
-              TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace);
+    EmitZeros(TD->getTypeAllocSize(Type::getX86_FP80Ty(Context)) -
+              TD->getTypeStoreSize(Type::getX86_FP80Ty(Context)), AddrSpace);
     return;
-  } else if (CFP->getType() == Type::PPC_FP128Ty) {
+  }
+  
+  if (CFP->getType()->isPPC_FP128Ty()) {
     // all long double variants are printed as hex
     // api needed to prevent premature destruction
     APInt api = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     if (TD->isBigEndian()) {
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word of ppc_fp128";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
-      if (VerboseAsm)      
-        O << '\t' << TAI->getCommentString()
-        << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+        << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word";
+      }
       O << '\n';
      } else {
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word of ppc_fp128";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word";
+      }
       O << '\n';
     }
     return;
-  } else assert(0 && "Floating point constant type not handled");
+  } else llvm_unreachable("Floating point constant type not handled");
 }
 
 void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
@@ -1229,29 +1235,37 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
     else
       Val = RawData[i];
 
-    if (TAI->getData64bitsDirective(AddrSpace))
-      O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n';
+    if (MAI->getData64bitsDirective(AddrSpace))
+      O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n';
     else if (TD->isBigEndian()) {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant half of i64 " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant half of i64 " << Val;
+      }
       O << '\n';
     } else {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant half of i64 " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant half of i64 " << Val;
+      }
       O << '\n';
     }
   }
@@ -1292,7 +1306,8 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
       SmallString<40> S;
       CI->getValue().toStringUnsigned(S, 16);
-      O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str();
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " 0x" << S.str();
     }
   }
   O << '\n';
@@ -1300,7 +1315,7 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
 
 void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
   // Target doesn't support this yet!
-  abort();
+  llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
 }
 
 /// PrintSpecial - Print information related to the specified machine instr
@@ -1311,10 +1326,10 @@ void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
 /// for their own strange codes.
 void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
   if (!strcmp(Code, "private")) {
-    O << TAI->getPrivateGlobalPrefix();
+    O << MAI->getPrivateGlobalPrefix();
   } else if (!strcmp(Code, "comment")) {
     if (VerboseAsm)
-      O << TAI->getCommentString();
+      O << MAI->getCommentString();
   } else if (!strcmp(Code, "uid")) {
     // Comparing the address of MI isn't sufficient, because machineinstrs may
     // be allocated to the same address across functions.
@@ -1328,23 +1343,38 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
     }
     O << Counter;
   } else {
-    cerr << "Unknown special formatter '" << Code
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Unknown special formatter '" << Code
          << "' for machine instr: " << *MI;
-    exit(1);
+    llvm_report_error(Msg.str());
   }    
 }
 
 /// processDebugLoc - Processes the debug information of each machine
 /// instruction's DebugLoc.
-void AsmPrinter::processDebugLoc(DebugLoc DL) {
-  if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
+void AsmPrinter::processDebugLoc(const MachineInstr *MI, 
+                                 bool BeforePrintingInsn) {
+  if (!MAI || !DW)
+    return;
+  DebugLoc DL = MI->getDebugLoc();
+  if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
     if (!DL.isUnknown()) {
       DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
-
-      if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT)
-        printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
-                                        DICompileUnit(CurDLT.CompileUnit)));
-
+      if (BeforePrintingInsn) {
+        if (CurDLT.Scope != 0 && PrevDLT != CurDLT) {
+	  unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+	  				    CurDLT.Scope);
+          printLabel(L);
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+          DW->SetDbgScopeBeginLabels(MI, L);
+#endif
+        } else {
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+          DW->SetDbgScopeEndLabels(MI, 0);
+#endif
+        }
+      } 
       PrevDLT = CurDLT;
     }
   }
@@ -1369,14 +1399,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
   // If this asmstr is empty, just print the #APP/#NOAPP markers.
   // These are useful to see where empty asm's wound up.
   if (AsmStr[0] == 0) {
-    O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+    O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
+    O << MAI->getCommentString() << MAI->getInlineAsmEnd() << '\n';
     return;
   }
   
-  O << TAI->getInlineAsmStart() << "\n\t";
+  O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
 
   // The variant of the current asmprinter.
-  int AsmPrinterVariant = TAI->getAssemblerDialect();
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
 
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
@@ -1413,9 +1444,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
       case '(':             // $( -> same as GCC's { character.
         ++LastEmitted;      // Consume '(' character.
         if (CurVariant != -1) {
-          cerr << "Nested variants found in inline asm string: '"
-               << AsmStr << "'\n";
-          exit(1);
+          llvm_report_error("Nested variants found in inline asm string: '"
+                            + std::string(AsmStr) + "'");
         }
         CurVariant = 0;     // We're in the first variant now.
         break;
@@ -1450,9 +1480,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
         const char *StrStart = LastEmitted;
         const char *StrEnd = strchr(StrStart, '}');
         if (StrEnd == 0) {
-          cerr << "Unterminated ${:foo} operand in inline asm string: '" 
-               << AsmStr << "'\n";
-          exit(1);
+          llvm_report_error("Unterminated ${:foo} operand in inline asm string: '" 
+                            + std::string(AsmStr) + "'");
         }
         
         std::string Val(StrStart, StrEnd);
@@ -1466,9 +1495,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
       errno = 0;
       long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
       if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
-        cerr << "Bad $ operand number in inline asm string: '" 
-             << AsmStr << "'\n";
-        exit(1);
+        llvm_report_error("Bad $ operand number in inline asm string: '" 
+                          + std::string(AsmStr) + "'");
       }
       LastEmitted = IDEnd;
       
@@ -1480,9 +1508,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
         if (*LastEmitted == ':') {
           ++LastEmitted;    // Consume ':' character.
           if (*LastEmitted == 0) {
-            cerr << "Bad ${:} expression in inline asm string: '" 
-                 << AsmStr << "'\n";
-            exit(1);
+            llvm_report_error("Bad ${:} expression in inline asm string: '" 
+                              + std::string(AsmStr) + "'");
           }
           
           Modifier[0] = *LastEmitted;
@@ -1490,17 +1517,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
         }
         
         if (*LastEmitted != '}') {
-          cerr << "Bad ${} expression in inline asm string: '" 
-               << AsmStr << "'\n";
-          exit(1);
+          llvm_report_error("Bad ${} expression in inline asm string: '" 
+                            + std::string(AsmStr) + "'");
         }
         ++LastEmitted;    // Consume '}' character.
       }
       
       if ((unsigned)Val >= NumOperands-1) {
-        cerr << "Invalid $ operand number in inline asm string: '" 
-             << AsmStr << "'\n";
-        exit(1);
+        llvm_report_error("Invalid $ operand number in inline asm string: '" 
+                          + std::string(AsmStr) + "'");
       }
       
       // Okay, we finally have a value number.  Ask the target to print this
@@ -1524,8 +1549,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
           ++OpNo;  // Skip over the ID number.
 
           if (Modifier[0]=='l')  // labels are target independent
-            printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), 
-                                 false, false, false);
+            GetMBBSymbol(MI->getOperand(OpNo).getMBB()
+                           ->getNumber())->print(O, MAI);
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if ((OpFlags & 7) == 4) {
@@ -1538,25 +1563,28 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
           }
         }
         if (Error) {
-          cerr << "Invalid operand found in inline asm: '"
+          std::string msg;
+          raw_string_ostream Msg(msg);
+          Msg << "Invalid operand found in inline asm: '"
                << AsmStr << "'\n";
-          MI->dump();
-          exit(1);
+          MI->print(Msg);
+          llvm_report_error(Msg.str());
         }
       }
       break;
     }
     }
   }
-  O << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+  O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd();
 }
 
 /// printImplicitDef - This method prints the specified machine instruction
 /// that is an implicit def.
 void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
-  if (VerboseAsm)
-    O << '\t' << TAI->getCommentString() << " implicit-def: "
-      << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n';
+  if (!VerboseAsm) return;
+  O.PadToColumn(MAI->getCommentColumn());
+  O << MAI->getCommentString() << " implicit-def: "
+    << TRI->getName(MI->getOperand(0).getReg());
 }
 
 /// printLabel - This method prints a local label used by debug and
@@ -1566,17 +1594,7 @@ void AsmPrinter::printLabel(const MachineInstr *MI) const {
 }
 
 void AsmPrinter::printLabel(unsigned Id) const {
-  O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n";
-}
-
-/// printDeclare - This method prints a local variable declaration used by
-/// debug tables.
-/// FIXME: It doesn't really print anything rather it inserts a DebugVariable
-/// entry into dwarf table.
-void AsmPrinter::printDeclare(const MachineInstr *MI) const {
-  unsigned FI = MI->getOperand(0).getIndex();
-  GlobalValue *GV = MI->getOperand(1).getGlobal();
-  DW->RecordVariable(cast<GlobalVariable>(GV), FI, MI);
+  O << MAI->getPrivateGlobalPrefix() << "label" << Id << ':';
 }
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
@@ -1595,51 +1613,69 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-/// printBasicBlockLabel - This method prints the label for the specified
-/// MachineBasicBlock
-void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
-                                      bool printAlign, 
-                                      bool printColon,
-                                      bool printComment) const {
-  if (printAlign) {
-    unsigned Align = MBB->getAlignment();
-    if (Align)
-      EmitAlignment(Log2_32(Align));
-  }
+MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB"
+    << getFunctionNumber() << '_' << MBBID;
+  
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
 
-  O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_'
-    << MBB->getNumber();
-  if (printColon)
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+  if (unsigned Align = MBB->getAlignment())
+    EmitAlignment(Log2_32(Align));
+
+  if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
+    if (VerboseAsm)
+      O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
+  } else {
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
     O << ':';
-  if (printComment && MBB->getBasicBlock())
-    O << '\t' << TAI->getCommentString() << ' '
-      << MBB->getBasicBlock()->getNameStart();
+    if (!VerboseAsm)
+      O << '\n';
+  }
+  
+  if (VerboseAsm) {
+    if (const BasicBlock *BB = MBB->getBasicBlock())
+      if (BB->hasName()) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << ' ';
+        WriteAsOperand(O, BB, /*PrintType=*/false);
+      }
+
+    EmitComments(*MBB);
+    O << '\n';
+  }
 }
 
 /// printPICJumpTableSetLabel - This method prints a set label for the
 /// specified MachineBasicBlock for a jumptable entry.
 void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, 
                                            const MachineBasicBlock *MBB) const {
-  if (!TAI->getSetDirective())
+  if (!MAI->getSetDirective())
     return;
   
-  O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false, false);
-  O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+  GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
     << '_' << uid << '\n';
 }
 
 void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
                                            const MachineBasicBlock *MBB) const {
-  if (!TAI->getSetDirective())
+  if (!MAI->getSetDirective())
     return;
   
-  O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << '_' << uid2
     << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false, false);
-  O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+  GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
     << '_' << uid << '_' << uid2 << '\n';
 }
 
@@ -1648,73 +1684,51 @@ void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
 void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) {
   const TargetData *TD = TM.getTargetData();
   switch (type->getTypeID()) {
+  case Type::FloatTyID: case Type::DoubleTyID:
+  case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID:
+    assert(0 && "Should have already output floating point constant.");
+  default:
+    assert(0 && "Can't handle printing this type of thing");
   case Type::IntegerTyID: {
     unsigned BitWidth = cast<IntegerType>(type)->getBitWidth();
     if (BitWidth <= 8)
-      O << TAI->getData8bitsDirective(AddrSpace);
+      O << MAI->getData8bitsDirective(AddrSpace);
     else if (BitWidth <= 16)
-      O << TAI->getData16bitsDirective(AddrSpace);
+      O << MAI->getData16bitsDirective(AddrSpace);
     else if (BitWidth <= 32)
-      O << TAI->getData32bitsDirective(AddrSpace);
+      O << MAI->getData32bitsDirective(AddrSpace);
     else if (BitWidth <= 64) {
-      assert(TAI->getData64bitsDirective(AddrSpace) &&
+      assert(MAI->getData64bitsDirective(AddrSpace) &&
              "Target cannot handle 64-bit constant exprs!");
-      O << TAI->getData64bitsDirective(AddrSpace);
+      O << MAI->getData64bitsDirective(AddrSpace);
     } else {
-      assert(0 && "Target cannot handle given data directive width!");
+      llvm_unreachable("Target cannot handle given data directive width!");
     }
     break;
   }
   case Type::PointerTyID:
     if (TD->getPointerSize() == 8) {
-      assert(TAI->getData64bitsDirective(AddrSpace) &&
+      assert(MAI->getData64bitsDirective(AddrSpace) &&
              "Target cannot handle 64-bit pointer exprs!");
-      O << TAI->getData64bitsDirective(AddrSpace);
+      O << MAI->getData64bitsDirective(AddrSpace);
     } else if (TD->getPointerSize() == 2) {
-      O << TAI->getData16bitsDirective(AddrSpace);
+      O << MAI->getData16bitsDirective(AddrSpace);
     } else if (TD->getPointerSize() == 1) {
-      O << TAI->getData8bitsDirective(AddrSpace);
+      O << MAI->getData8bitsDirective(AddrSpace);
     } else {
-      O << TAI->getData32bitsDirective(AddrSpace);
+      O << MAI->getData32bitsDirective(AddrSpace);
     }
     break;
-  case Type::FloatTyID: case Type::DoubleTyID:
-  case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID:
-    assert (0 && "Should have already output floating point constant.");
-  default:
-    assert (0 && "Can't handle printing this type of thing");
-    break;
   }
 }
 
-void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix,
-                                   const char *Prefix) {
-  if (Name[0]=='\"')
-    O << '\"';
-  O << TAI->getPrivateGlobalPrefix();
-  if (Prefix) O << Prefix;
-  if (Name[0]=='\"')
-    O << '\"';
-  if (Name[0]=='\"')
-    O << Name[1];
-  else
-    O << Name;
-  O << Suffix;
-  if (Name[0]=='\"')
-    O << '\"';
-}
-
-void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) {
-  printSuffixedName(Name.c_str(), Suffix);
-}
-
 void AsmPrinter::printVisibility(const std::string& Name,
                                  unsigned Visibility) const {
   if (Visibility == GlobalValue::HiddenVisibility) {
-    if (const char *Directive = TAI->getHiddenDirective())
+    if (const char *Directive = MAI->getHiddenDirective())
       O << Directive << Name << '\n';
   } else if (Visibility == GlobalValue::ProtectedVisibility) {
-    if (const char *Directive = TAI->getProtectedDirective())
+    if (const char *Directive = MAI->getProtectedDirective())
       O << Directive << Name << '\n';
   }
 }
@@ -1746,6 +1760,104 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
       return GMP;
     }
   
-  cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n";
-  abort();
+  errs() << "no GCMetadataPrinter registered for GC: " << Name << "\n";
+  llvm_unreachable(0);
+}
+
+/// EmitComments - Pretty-print comments for instructions
+void AsmPrinter::EmitComments(const MachineInstr &MI) const {
+  assert(VerboseAsm && !MI.getDebugLoc().isUnknown());
+  
+  DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+
+  // Print source line info.
+  O.PadToColumn(MAI->getCommentColumn());
+  O << MAI->getCommentString() << " SrcLine ";
+  if (DLT.Scope) {
+    DICompileUnit CU(DLT.Scope);
+    if (!CU.isNull())
+      O << CU.getFilename() << " ";
+  }
+  O << DLT.Line;
+  if (DLT.Col != 0) 
+    O << ":" << DLT.Col;
+}
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+///
+static void PrintChildLoopComment(formatted_raw_ostream &O,
+                                  const MachineLoop *loop,
+                                  const MCAsmInfo *MAI,
+                                  int FunctionNumber) {
+  // Add child loop information
+  for(MachineLoop::iterator cl = loop->begin(),
+        clend = loop->end();
+      cl != clend;
+      ++cl) {
+    MachineBasicBlock *Header = (*cl)->getHeader();
+    assert(Header && "No header for loop");
+
+    O << '\n';
+    O.PadToColumn(MAI->getCommentColumn());
+
+    O << MAI->getCommentString();
+    O.indent(((*cl)->getLoopDepth()-1)*2)
+      << " Child Loop BB" << FunctionNumber << "_"
+      << Header->getNumber() << " Depth " << (*cl)->getLoopDepth();
+
+    PrintChildLoopComment(O, *cl, MAI, FunctionNumber);
+  }
+}
+
+/// EmitComments - Pretty-print comments for basic blocks
+void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const
+{
+  if (VerboseAsm) {
+    // Add loop depth information
+    const MachineLoop *loop = LI->getLoopFor(&MBB);
+
+    if (loop) {
+      // Print a newline after bb# annotation.
+      O << "\n";
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Loop Depth " << loop->getLoopDepth()
+        << '\n';
+
+      O.PadToColumn(MAI->getCommentColumn());
+
+      MachineBasicBlock *Header = loop->getHeader();
+      assert(Header && "No header for loop");
+      
+      if (Header == &MBB) {
+        O << MAI->getCommentString() << " Loop Header";
+        PrintChildLoopComment(O, loop, MAI, getFunctionNumber());
+      }
+      else {
+        O << MAI->getCommentString() << " Loop Header is BB"
+          << getFunctionNumber() << "_" << loop->getHeader()->getNumber();
+      }
+
+      if (loop->empty()) {
+        O << '\n';
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " Inner Loop";
+      }
+
+      // Add parent loop information
+      for (const MachineLoop *CurLoop = loop->getParentLoop();
+           CurLoop;
+           CurLoop = CurLoop->getParentLoop()) {
+        MachineBasicBlock *Header = CurLoop->getHeader();
+        assert(Header && "No header for loop");
+
+        O << '\n';
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString();
+        O.indent((CurLoop->getLoopDepth()-1)*2)
+          << " Inside Loop BB" << getFunctionNumber() << "_"
+          << Header->getNumber() << " Depth " << CurLoop->getLoopDepth();
+      }
+    }
+  }
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 01c431c..ecf0007 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -14,9 +14,10 @@
 #include "DIE.h"
 #include "DwarfPrinter.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
-#include <ostream>
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -75,24 +76,24 @@ void DIEAbbrev::Emit(const AsmPrinter *Asm) const {
 }
 
 #ifndef NDEBUG
-void DIEAbbrev::print(std::ostream &O) {
+void DIEAbbrev::print(raw_ostream &O) {
   O << "Abbreviation @"
-    << std::hex << (intptr_t)this << std::dec
+    << format("0x%lx", (long)(intptr_t)this)
     << "  "
     << dwarf::TagString(Tag)
     << " "
     << dwarf::ChildrenString(ChildrenFlag)
-    << "\n";
+    << '\n';
 
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
     O << "  "
       << dwarf::AttributeString(Data[i].getAttribute())
       << "  "
       << dwarf::FormEncodingString(Data[i].getForm())
-      << "\n";
+      << '\n';
   }
 }
-void DIEAbbrev::dump() { print(cerr); }
+void DIEAbbrev::dump() { print(errs()); }
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -125,7 +126,7 @@ void DIE::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIE::print(std::ostream &O, unsigned IncIndent) {
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
   const std::string Indent(IndentCount, ' ');
   bool isBlock = Abbrev.getTag() == 0;
@@ -133,7 +134,7 @@ void DIE::print(std::ostream &O, unsigned IncIndent) {
   if (!isBlock) {
     O << Indent
       << "Die: "
-      << "0x" << std::hex << (intptr_t)this << std::dec
+      << format("0x%lx", (long)(intptr_t)this)
       << ", Offset: " << Offset
       << ", Size: " << Size
       << "\n";
@@ -175,14 +176,14 @@ void DIE::print(std::ostream &O, unsigned IncIndent) {
 }
 
 void DIE::dump() {
-  print(cerr);
+  print(errs());
 }
 #endif
 
 
 #ifndef NDEBUG
 void DIEValue::dump() {
-  print(cerr);
+  print(errs());
 }
 #endif
 
@@ -206,7 +207,7 @@ void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const {
   case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer);        break;
   case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break;
-  default: assert(0 && "DIE Value form not supported yet");  break;
+  default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
 
@@ -223,9 +224,9 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
-  case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer);
-  case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer);
-  default: assert(0 && "DIE Value form not supported yet"); break;
+  case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+  default: llvm_unreachable("DIE Value form not supported yet"); break;
   }
   return 0;
 }
@@ -241,9 +242,9 @@ void DIEInteger::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEInteger::print(std::ostream &O) {
+void DIEInteger::print(raw_ostream &O) {
   O << "Int: " << (int64_t)Integer
-    << "  0x" << std::hex << Integer << std::dec;
+    << format("  0x%llx", (unsigned long long)Integer);
 }
 #endif
 
@@ -268,7 +269,7 @@ void DIEString::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEString::print(std::ostream &O) {
+void DIEString::print(raw_ostream &O) {
   O << "Str: \"" << Str << "\"";
 }
 #endif
@@ -302,7 +303,7 @@ void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEDwarfLabel::print(std::ostream &O) {
+void DIEDwarfLabel::print(raw_ostream &O) {
   O << "Lbl: ";
   Label.print(O);
 }
@@ -337,7 +338,7 @@ void DIEObjectLabel::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEObjectLabel::print(std::ostream &O) {
+void DIEObjectLabel::print(raw_ostream &O) {
   O << "Obj: " << Label;
 }
 #endif
@@ -377,7 +378,7 @@ void DIESectionOffset::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIESectionOffset::print(std::ostream &O) {
+void DIESectionOffset::print(raw_ostream &O) {
   O << "Off: ";
   Label.print(O);
   O << "-";
@@ -417,7 +418,7 @@ void DIEDelta::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEDelta::print(std::ostream &O) {
+void DIEDelta::print(raw_ostream &O) {
   O << "Del: ";
   LabelHi.print(O);
   O << "-";
@@ -451,8 +452,8 @@ void DIEEntry::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEEntry::print(std::ostream &O) {
-  O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec;
+void DIEEntry::print(raw_ostream &O) {
+  O << format("Die: 0x%lx", (long)(intptr_t)Entry);
 }
 #endif
 
@@ -481,7 +482,7 @@ void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const {
   case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);        break;
   case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);        break;
   case dwarf::DW_FORM_block:  Asm->EmitULEB128Bytes(Size); break;
-  default: assert(0 && "Improper form for block");         break;
+  default: llvm_unreachable("Improper form for block");         break;
   }
 
   const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
@@ -498,8 +499,8 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
-  case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size);
-  default: assert(0 && "Improper form for block"); break;
+  case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+  default: llvm_unreachable("Improper form for block"); break;
   }
   return 0;
 }
@@ -510,7 +511,7 @@ void DIEBlock::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEBlock::print(std::ostream &O) {
+void DIEBlock::print(raw_ostream &O) {
   O << "Blk: ";
   DIE::print(O, 5);
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 5b60327..62b51ec 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -19,8 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/raw_ostream.h"
-#include <iosfwd>
+#include <vector>
 
 namespace llvm {
   class AsmPrinter;
@@ -103,10 +102,7 @@ namespace llvm {
     void Emit(const AsmPrinter *Asm) const;
 
 #ifndef NDEBUG
-    void print(std::ostream *O) {
-      if (O) print(*O);
-    }
-    void print(std::ostream &O);
+    void print(raw_ostream &O);
     void dump();
 #endif
   };
@@ -198,10 +194,7 @@ namespace llvm {
     void Profile(FoldingSetNodeID &ID) ;
 
 #ifndef NDEBUG
-    void print(std::ostream *O, unsigned IncIndent = 0) {
-      if (O) print(*O, IncIndent);
-    }
-    void print(std::ostream &O, unsigned IncIndent = 0);
+    void print(raw_ostream &O, unsigned IncIndent = 0);
     void dump();
 #endif
   };
@@ -248,10 +241,7 @@ namespace llvm {
     static bool classof(const DIEValue *) { return true; }
 
 #ifndef NDEBUG
-    void print(std::ostream *O) {
-      if (O) print(*O);
-    }
-    virtual void print(std::ostream &O) = 0;
+    virtual void print(raw_ostream &O) = 0;
     void dump();
 #endif
   };
@@ -297,7 +287,7 @@ namespace llvm {
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -329,7 +319,7 @@ namespace llvm {
     static bool classof(const DIEValue *S) { return S->getType() == isString; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -359,7 +349,7 @@ namespace llvm {
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -392,7 +382,7 @@ namespace llvm {
     }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -431,7 +421,7 @@ namespace llvm {
     }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -464,7 +454,7 @@ namespace llvm {
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -500,7 +490,7 @@ namespace llvm {
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -544,7 +534,7 @@ namespace llvm {
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 547140f..4394ec0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -10,16 +10,24 @@
 // This file contains support for writing dwarf debug info into asm files.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/System/Path.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/System/Path.h"
 using namespace llvm;
 
 static TimerGroup &getDwarfTimerGroup() {
@@ -51,11 +59,13 @@ class VISIBILITY_HIDDEN CompileUnit {
 
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
   /// variables to debug information entries.
-  std::map<GlobalVariable *, DIE *> GVToDieMap;
+  /// FIXME : Rename GVToDieMap -> NodeToDieMap
+  std::map<MDNode *, DIE *> GVToDieMap;
 
   /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
   /// descriptors to debug information entries using a DIEEntry proxy.
-  std::map<GlobalVariable *, DIEEntry *> GVToDIEEntryMap;
+  /// FIXME : Rename
+  std::map<MDNode *, DIEEntry *> GVToDIEEntryMap;
 
   /// Globals - A map of globally visible named entities for this unit.
   ///
@@ -84,12 +94,12 @@ public:
 
   /// getDieMapSlotFor - Returns the debug information entry map slot for the
   /// specified debug variable.
-  DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; }
+  DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; }
 
-  /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the
-  /// specified debug variable.
-  DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) {
-    return GVToDIEEntryMap[GV];
+  /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for
+  /// the specified debug variable.
+  DIEEntry *&getDIEEntrySlotFor(MDNode *N) {
+    return GVToDIEEntryMap[N];
   }
 
   /// AddDie - Adds or interns the DIE to the compile unit.
@@ -138,15 +148,18 @@ class VISIBILITY_HIDDEN DbgScope {
                                       // Either subprogram or block.
   unsigned StartLabelID;              // Label ID of the beginning of scope.
   unsigned EndLabelID;                // Label ID of the end of scope.
+  const MachineInstr *LastInsn;       // Last instruction of this scope.
+  const MachineInstr *FirstInsn;      // First instruction of this scope.
   SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
   SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
   SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
-  
+
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D)
-    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), IndentLevel(0) {}
+    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), LastInsn(0),
+      FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
@@ -159,7 +172,10 @@ public:
   SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
   void setStartLabelID(unsigned S) { StartLabelID = S; }
   void setEndLabelID(unsigned E)   { EndLabelID = E; }
-
+  void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
+  const MachineInstr *getLastInsn()      { return LastInsn; }
+  void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+  const MachineInstr *getFirstInsn()      { return FirstInsn; }
   /// AddScope - Add a scope to the scope.
   ///
   void AddScope(DbgScope *S) { Scopes.push_back(S); }
@@ -172,6 +188,21 @@ public:
   ///
   void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
 
+  void FixInstructionMarkers() {
+    assert (getFirstInsn() && "First instruction is missing!");
+    if (getLastInsn())
+      return;
+    
+    // If a scope does not have an instruction to mark an end then use
+    // the end of last child scope.
+    SmallVector<DbgScope *, 4> &Scopes = getScopes();
+    assert (!Scopes.empty() && "Inner most scope does not have last insn!");
+    DbgScope *L = Scopes.back();
+    if (!L->getLastInsn())
+      L->FixInstructionMarkers();
+    setLastInsn(L->getLastInsn());
+  }
+
 #ifndef NDEBUG
   void dump() const;
 #endif
@@ -179,10 +210,10 @@ public:
 
 #ifndef NDEBUG
 void DbgScope::dump() const {
-  std::string Indent(IndentLevel, ' ');
-
-  cerr << Indent; Desc.dump();
-  cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+  raw_ostream &err = errs();
+  err.indent(IndentLevel);
+  Desc.dump();
+  err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
 
   IndentLevel += 2;
 
@@ -220,10 +251,10 @@ DbgScope::~DbgScope() {
 
 } // end llvm namespace
 
-DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
   : Dwarf(OS, A, T, "dbg"), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
-    ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(),
+    ValuesSet(InitValuesSetSize), Values(), StringPool(),
     SectionSourceLines(), didInitial(false), shouldEmit(false),
     FunctionDbgScope(0), DebugTimer(0) {
   if (TimePassesIsEnabled)
@@ -234,7 +265,7 @@ DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = Values.size(); j < M; ++j)
     delete Values[j];
 
-  for (DenseMap<const GlobalVariable *, DbgScope *>::iterator
+  for (DenseMap<const MDNode *, DbgScope *>::iterator
          I = AbstractInstanceRootMap.begin(),
          E = AbstractInstanceRootMap.end(); I != E;++I)
     delete I->second;
@@ -479,6 +510,27 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) {
   AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) {
+  // If there is no compile unit specified, don't add a line #.
+  if (SP->getCompileUnit().isNull())
+    return;
+  // If the line number is 0, don't add it.
+  if (SP->getLineNumber() == 0)
+    return;
+
+
+  unsigned Line = SP->getLineNumber();
+  unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID();
+  assert(FileID && "Invalid file id");
+  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
 void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
   // If there is no compile unit specified, don't add a line #.
   DICompileUnit CU = Ty->getCompileUnit();
@@ -492,6 +544,270 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
   AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
+/* Byref variables, in Blocks, are declared by the programmer as
+   "SomeType VarName;", but the compiler creates a
+   __Block_byref_x_VarName struct, and gives the variable VarName
+   either the struct, or a pointer to the struct, as its type.  This
+   is necessary for various behind-the-scenes things the compiler
+   needs to do with by-reference variables in blocks.
+
+   However, as far as the original *programmer* is concerned, the
+   variable should still have type 'SomeType', as originally declared.
+
+   The following function dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable.  This will be
+   passed back to the code generating the type for the Debug
+   Information Entry for the variable 'VarName'.  'VarName' will then
+   have the original type 'SomeType' in its debug information.
+
+   The original type 'SomeType' will be the type of the field named
+   'VarName' inside the __Block_byref_x_VarName struct.
+
+   NOTE: In order for this to not completely fail on the debugger
+   side, the Debug Information Entry for the variable VarName needs to
+   have a DW_AT_location that tells the debugger how to unwind through
+   the pointers and __Block_byref_x_VarName struct to find the actual
+   value of the variable.  The function AddBlockByrefType does this.  */
+
+/// Find the type the programmer originally declared the variable to be
+/// and return that type.
+///
+DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) {
+
+  DIType subType = Ty;
+  unsigned tag = Ty.getTag();
+
+  if (tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty.getNode());
+    subType = DTy.getTypeDerivedFrom();
+  }
+
+  DICompositeType blockStruct = DICompositeType(subType.getNode());
+
+  DIArray Elements = blockStruct.getTypeArray();
+
+  if (Elements.isNull())
+    return Ty;
+
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element.getNode());
+    if (strcmp(Name.c_str(), DT.getName()) == 0)
+      return (DT.getTypeDerivedFrom());
+  }
+
+  return Ty;
+}
+
+/// AddComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location.  Add the DWARF information to the die.
+///
+void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
+                                   unsigned Attribute,
+                                   const MachineLocation &Location) {
+  const DIVariable &VD = DV->getVariable();
+  DIType Ty = VD.getType();
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32) {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    } else {
+      Reg = Reg - dwarf::DW_OP_reg0;
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
+    uint64_t Element = VD.getAddrElement(i);
+
+    if (Element == DIFactory::OpPlus) {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+    } else if (Element == DIFactory::OpDeref) {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    } else llvm_unreachable("unknown DIFactory Opcode");
+  }
+
+  // Now attach the location information to the DIE.
+  AddBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+   gives the variable VarName either the struct, or a pointer to the struct, as
+   its type.  This is necessary for various behind-the-scenes things the
+   compiler needs to do with by-reference variables in Blocks.
+
+   However, as far as the original *programmer* is concerned, the variable
+   should still have type 'SomeType', as originally declared.
+
+   The function GetBlockByrefType dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable, which is then assigned to
+   the variable's Debug Information Entry as its real type.  So far, so good.
+   However now the debugger will expect the variable VarName to have the type
+   SomeType.  So we need the location attribute for the variable to be an
+   expression that explains to the debugger how to navigate through the
+   pointers and struct to find the actual variable of type SomeType.
+
+   The following function does just that.  We start by getting
+   the "normal" location for the variable. This will be the location
+   of either the struct __Block_byref_x_VarName or the pointer to the
+   struct __Block_byref_x_VarName.
+
+   The struct will look something like:
+
+   struct __Block_byref_x_VarName {
+     ... <various fields>
+     struct __Block_byref_x_VarName *forwarding;
+     ... <various other fields>
+     SomeType VarName;
+     ... <maybe more fields>
+   };
+
+   If we are given the struct directly (as our starting point) we
+   need to tell the debugger to:
+
+   1).  Add the offset of the forwarding field.
+
+   2).  Follow that pointer to get the the real __Block_byref_x_VarName
+   struct to use (the real one may have been copied onto the heap).
+
+   3).  Add the offset for the field VarName, to find the actual variable.
+
+   If we started with a pointer to the struct, then we need to
+   dereference that pointer first, before the other steps.
+   Translating this into DWARF ops, we will need to append the following
+   to the current location description for the variable:
+
+   DW_OP_deref                    -- optional, if we start with a pointer
+   DW_OP_plus_uconst <forward_fld_offset>
+   DW_OP_deref
+   DW_OP_plus_uconst <varName_fld_offset>
+
+   That is what this function does.  */
+
+/// AddBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location.  Add the DWARF information to the die.  For
+/// more information, read large comment just above here.
+///
+void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+                                      unsigned Attribute,
+                                      const MachineLocation &Location) {
+  const DIVariable &VD = DV->getVariable();
+  DIType Ty = VD.getType();
+  DIType TmpTy = Ty;
+  unsigned Tag = Ty.getTag();
+  bool isPointer = false;
+
+  const char *varName = VD.getName();
+
+  if (Tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty.getNode());
+    TmpTy = DTy.getTypeDerivedFrom();
+    isPointer = true;
+  }
+
+  DICompositeType blockStruct = DICompositeType(TmpTy.getNode());
+
+  // Find the __forwarding field and the variable field in the __Block_byref
+  // struct.
+  DIArray Fields = blockStruct.getTypeArray();
+  DIDescriptor varField = DIDescriptor();
+  DIDescriptor forwardingField = DIDescriptor();
+
+
+  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Fields.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element.getNode());
+    const char *fieldName = DT.getName();
+    if (strcmp(fieldName, "__forwarding") == 0)
+      forwardingField = Element;
+    else if (strcmp(fieldName, varName) == 0)
+      varField = Element;
+  }
+
+  assert(!varField.isNull() && "Can't find byref variable in Block struct");
+  assert(!forwardingField.isNull()
+         && "Can't find forwarding field in Block struct");
+
+  // Get the offsets for the forwarding field and the variable field.
+  unsigned int forwardingFieldOffset =
+    DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
+  unsigned int varFieldOffset =
+    DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32)
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    else {
+      Reg = Reg - dwarf::DW_OP_reg0;
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  // If we started with a pointer to the __Block_byref... struct, then
+  // the first thing we need to do is dereference the pointer (DW_OP_deref).
+  if (isPointer)
+    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Next add the offset for the '__forwarding' field:
+  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
+  // adding the offset if it's 0.
+  if (forwardingFieldOffset > 0) {
+    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+  }
+
+  // Now dereference the __forwarding field to get to the real __Block_byref
+  // struct:  DW_OP_deref.
+  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Now that we've got the real __Block_byref... struct, add the offset
+  // for the variable's field to get to the location of the actual variable:
+  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
+  if (varFieldOffset > 0) {
+    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+  }
+
+  // Now attach the location information to the DIE.
+  AddBlock(Die, Attribute, 0, Block);
+}
+
 /// AddAddress - Add an address attribute to a die based on the location
 /// provided.
 void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute,
@@ -526,7 +842,7 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
     return;
 
   // Check for pre-existence.
-  DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV());
+  DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode());
 
   // If it exists then use the existing value.
   if (Slot) {
@@ -539,20 +855,20 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
 
   // Construct type.
   DIE Buffer(dwarf::DW_TAG_base_type);
-  if (Ty.isBasicType(Ty.getTag()))
-    ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV()));
-  else if (Ty.isDerivedType(Ty.getTag()))
-    ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV()));
+  if (Ty.isBasicType())
+    ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode()));
+  else if (Ty.isCompositeType())
+    ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode()));
   else {
-    assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType");
-    ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV()));
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode()));
   }
 
   // Add debug information entry to entity and appropriate context.
   DIE *Die = NULL;
   DIDescriptor Context = Ty.getContext();
   if (!Context.isNull())
-    Die = DW_Unit->getDieMapSlotFor(Context.getGV());
+    Die = DW_Unit->getDieMapSlotFor(Context.getNode());
 
   if (Die) {
     DIE *Child = new DIE(Buffer);
@@ -571,14 +887,13 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
 void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIBasicType BTy) {
   // Get core information.
-  std::string Name;
-  BTy.getName(Name);
+  const char *Name = BTy.getName();
   Buffer.setTag(dwarf::DW_TAG_base_type);
   AddUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
           BTy.getEncoding());
 
   // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
+  if (Name)
     AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   uint64_t Size = BTy.getSizeInBits() >> 3;
   AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -588,8 +903,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIDerivedType DTy) {
   // Get core information.
-  std::string Name;
-  DTy.getName(Name);
+  const char *Name = DTy.getName();
   uint64_t Size = DTy.getSizeInBits() >> 3;
   unsigned Tag = DTy.getTag();
 
@@ -603,7 +917,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   AddType(DW_Unit, &Buffer, FromTy);
 
   // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
+  if (Name)
     AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add size if non-zero (derived types might be zero-sized.)
@@ -619,8 +933,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DICompositeType CTy) {
   // Get core information.
-  std::string Name;
-  CTy.getName(Name);
+  const char *Name = CTy.getName();
 
   uint64_t Size = CTy.getSizeInBits() >> 3;
   unsigned Tag = CTy.getTag();
@@ -637,9 +950,11 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add enumerators to enumeration type.
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIE *ElemDie = NULL;
-      DIEnumerator Enum(Elements.getElement(i).getGV());
-      ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
-      Buffer.AddChild(ElemDie);
+      DIEnumerator Enum(Elements.getElement(i).getNode());
+      if (!Enum.isNull()) {
+        ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
+        Buffer.AddChild(ElemDie);
+      }
     }
   }
     break;
@@ -647,7 +962,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add return type.
     DIArray Elements = CTy.getTypeArray();
     DIDescriptor RTy = Elements.getElement(0);
-    AddType(DW_Unit, &Buffer, DIType(RTy.getGV()));
+    AddType(DW_Unit, &Buffer, DIType(RTy.getNode()));
 
     // Add prototype flag.
     AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
@@ -656,7 +971,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      AddType(DW_Unit, Arg, DIType(Ty.getGV()));
+      AddType(DW_Unit, Arg, DIType(Ty.getNode()));
       Buffer.AddChild(Arg);
     }
   }
@@ -674,20 +989,19 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add elements to structure type.
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
+      if (Element.isNull())
+        continue;
       DIE *ElemDie = NULL;
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
         ElemDie = CreateSubprogramDIE(DW_Unit,
-                                      DISubprogram(Element.getGV()));
+                                      DISubprogram(Element.getNode()));
       else
         ElemDie = CreateMemberDIE(DW_Unit,
-                                  DIDerivedType(Element.getGV()));
+                                  DIDerivedType(Element.getNode()));
       Buffer.AddChild(ElemDie);
     }
 
-    // FIXME: We'd like an API to register additional attributes for the
-    // frontend to use while synthesizing, and then we'd use that api in clang
-    // instead of this.
-    if (Name == "__block_literal_generic")
+    if (CTy.isAppleBlockExtension())
       AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
     unsigned RLang = CTy.getRunTimeLang();
@@ -701,7 +1015,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   }
 
   // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
+  if (Name)
     AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   if (Tag == dwarf::DW_TAG_enumeration_type ||
@@ -729,12 +1043,11 @@ void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   int64_t H = SR.getHi();
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
 
-  if (L != H) {
-    AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-    if (L)
-      AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  if (L)
+    AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  if (H)
     AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
-  }
 
   Buffer.AddChild(DW_Subrange);
 }
@@ -761,15 +1074,14 @@ void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy);
+      ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy);
   }
 }
 
 /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
 DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  std::string Name;
-  ETy->getName(Name);
+  const char *Name = ETy->getName();
   AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   int64_t Value = ETy->getEnumValue();
   AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
@@ -780,27 +1092,39 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
 DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
                                          const DIGlobalVariable &GV) {
   DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
-  std::string Name;
-  GV.getDisplayName(Name);
-  AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  std::string LinkageName;
-  GV.getLinkageName(LinkageName);
-  if (!LinkageName.empty())
+  AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+            GV.getDisplayName());
+
+  const char *LinkageName = GV.getLinkageName();
+  if (LinkageName) {
+    // Skip special LLVM prefix that is used to inform the asm printer to not
+    // emit usual symbol prefix before the symbol name. This happens for
+    // Objective-C symbol names and symbol whose name is replaced using GCC's
+    // __asm__ attribute.
+    if (LinkageName[0] == 1)
+      LinkageName = &LinkageName[1];
     AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
+  }
   AddType(DW_Unit, GVDie, GV.getType());
   if (!GV.isLocalToUnit())
     AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
   AddSourceLine(GVDie, &GV);
+
+  // Add address.
+  DIEBlock *Block = new DIEBlock();
+  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+  AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+                 Asm->Mang->getMangledName(GV.getGlobal()));
+  AddBlock(GVDie, dwarf::DW_AT_location, 0, Block);
+
   return GVDie;
 }
 
 /// CreateMemberDIE - Create new member DIE.
 DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
   DIE *MemberDie = new DIE(DT.getTag());
-  std::string Name;
-  DT.getName(Name);
-  if (!Name.empty())
+  if (const char *Name = DT.getName())
     AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
@@ -849,17 +1173,19 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
                                      bool IsInlined) {
   DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
 
-  std::string Name;
-  SP.getName(Name);
+  const char * Name = SP.getName();
   AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
-  std::string LinkageName;
-  SP.getLinkageName(LinkageName);
-
-  if (!LinkageName.empty())
+  const char *LinkageName = SP.getLinkageName();
+  if (LinkageName) {
+    // Skip special LLVM prefix that is used to inform the asm printer to not emit
+    // usual symbol prefix before the symbol name. This happens for Objective-C
+    // symbol names and symbol whose name is replaced using GCC's __asm__ attribute.
+    if (LinkageName[0] == 1)
+      LinkageName = &LinkageName[1];
     AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
-
+  }
   AddSourceLine(SPDie, &SP);
 
   DICompositeType SPTy = SP.getType();
@@ -877,7 +1203,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
     if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
       AddType(DW_Unit, SPDie, SPTy);
     else
-      AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV()));
+      AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
   }
 
   if (!SP.isDefinition()) {
@@ -888,7 +1214,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV()));
+        AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
         AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
         SPDie->AddChild(Arg);
       }
@@ -898,7 +1224,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
     AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
-  DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV());
+  DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
   Slot = SPDie;
   return SPDie;
 }
@@ -907,7 +1233,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
 ///
 CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
   DenseMap<Value *, CompileUnit *>::const_iterator I =
-    CompileUnitMap.find(Unit.getGV());
+    CompileUnitMap.find(Unit.getNode());
   assert(I != CompileUnitMap.end() && "Missing compile unit.");
   return *I->second;
 }
@@ -935,15 +1261,18 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-  std::string Name;
-  VD.getName(Name);
+  const char *Name = VD.getName();
   AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add source line info if available.
   AddSourceLine(VariableDie, &VD);
 
   // Add variable type.
-  AddType(Unit, VariableDie, VD.getType());
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead.
+  if (VD.isBlockByrefVariable())
+    AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+  else
+    AddType(Unit, VariableDie, VD.getType());
 
   // Add variable address.
   if (!DV->isInlinedFnVar()) {
@@ -952,7 +1281,14 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
     MachineLocation Location;
     Location.set(RI->getFrameRegister(*MF),
                  RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-    AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+
+
+    if (VD.hasComplexAddress())
+      AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else if (VD.isBlockByrefVariable())
+      AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else
+      AddAddress(VariableDie, dwarf::DW_AT_location, Location);
   }
 
   return VariableDie;
@@ -960,26 +1296,64 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
 /// getOrCreateScope - Returns the scope associated with the given descriptor.
 ///
-DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) {
-  DbgScope *&Slot = DbgScopeMap[V];
+DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI) {
+  DbgScope *&Slot = DbgScopeMap[N];
+  if (Slot) return Slot;
+
+  DbgScope *Parent = NULL;
+
+  DIDescriptor Scope(N);
+  if (Scope.isCompileUnit()) {
+    return NULL;
+  } else if (Scope.isSubprogram()) {
+    DISubprogram SP(N);
+    DIDescriptor ParentDesc = SP.getContext();
+    if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
+      Parent = getDbgScope(ParentDesc.getNode(), MI);
+  } else if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(N);
+    DIDescriptor ParentDesc = DB.getContext();
+    if (!ParentDesc.isNull())
+      Parent = getDbgScope(ParentDesc.getNode(), MI);
+  } else
+    assert (0 && "Unexpected scope info");
+
+  Slot = new DbgScope(Parent, DIDescriptor(N));
+  Slot->setFirstInsn(MI);
+
+  if (Parent)
+    Parent->AddScope(Slot);
+  else
+    // First function is top level function.
+    if (!FunctionDbgScope)
+      FunctionDbgScope = Slot;
+
+  return Slot;
+}
+
+
+/// getOrCreateScope - Returns the scope associated with the given descriptor.
+/// FIXME - Remove this method.
+DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) {
+  DbgScope *&Slot = DbgScopeMap[N];
   if (Slot) return Slot;
 
   DbgScope *Parent = NULL;
-  DIBlock Block(V);
+  DILexicalBlock Block(N);
 
   // Don't create a new scope if we already created one for an inlined function.
-  DenseMap<const GlobalVariable *, DbgScope *>::iterator
-    II = AbstractInstanceRootMap.find(V);
+  DenseMap<const MDNode *, DbgScope *>::iterator
+    II = AbstractInstanceRootMap.find(N);
   if (II != AbstractInstanceRootMap.end())
     return LexicalScopeStack.back();
 
   if (!Block.isNull()) {
     DIDescriptor ParentDesc = Block.getContext();
     Parent =
-      ParentDesc.isNull() ?  NULL : getOrCreateScope(ParentDesc.getGV());
+      ParentDesc.isNull() ?  NULL : getOrCreateScope(ParentDesc.getNode());
   }
 
-  Slot = new DbgScope(Parent, DIDescriptor(V));
+  Slot = new DbgScope(Parent, DIDescriptor(N));
 
   if (Parent)
     Parent->AddScope(Slot);
@@ -1088,10 +1462,14 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
     return;
 
   // Get the subprogram debug information entry.
-  DISubprogram SPD(Desc.getGV());
+  DISubprogram SPD(Desc.getNode());
 
   // Get the subprogram die.
-  DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getGV());
+  DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
+  if (!SPDie) {
+    ConstructSubprogram(SPD.getNode());
+    SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
+  }
   assert(SPDie && "Missing subprogram descriptor");
 
   if (!AbstractScope) {
@@ -1105,23 +1483,33 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
   }
 
   ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU);
+  // If there are global variables at this scope then add their dies.
+  for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), 
+       SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
+    MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
+    if (!N) continue;
+    DIGlobalVariable GV(N);
+    if (GV.getContext().getNode() == RootScope->getDesc().getNode()) {
+      DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+      SPDie->AddChild(ScopedGVDie);
+    }
+  }
 }
 
 /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
 ///
 void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
-  const char *FnName = MF->getFunction()->getNameStart();
   StringMap<DIE*> &Globals = ModuleCU->getGlobals();
-  StringMap<DIE*>::iterator GI = Globals.find(FnName);
+  StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName());
   if (GI != Globals.end()) {
     DIE *SPDie = GI->second;
-    
+
     // Add the function bounds.
     AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
              DWLabel("func_begin", SubprogramCount));
     AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
              DWLabel("func_end", SubprogramCount));
-    
+
     MachineLocation Location(RI->getFrameRegister(*MF));
     AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
   }
@@ -1131,8 +1519,8 @@ void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName,
-                                         const std::string &FileName) {
+unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName,
+                                         const char *FileName) {
   unsigned DId;
   StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
   if (DI != DirectoryIdMap.end()) {
@@ -1165,30 +1553,28 @@ unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName,
   return SrcId;
 }
 
-void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
-  DICompileUnit DIUnit(GV);
-  std::string Dir, FN, Prod;
-  unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir),
-                                    DIUnit.getFilename(FN));
+void DwarfDebug::ConstructCompileUnit(MDNode *N) {
+  DICompileUnit DIUnit(N);
+  const char *FN = DIUnit.getFilename();
+  const char *Dir = DIUnit.getDirectory();
+  unsigned ID = GetOrCreateSourceID(Dir, FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
   AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
                    DWLabel("section_line", 0), DWLabel("section_line", 0),
                    false);
   AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
-            DIUnit.getProducer(Prod));
+            DIUnit.getProducer());
   AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
   AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
 
-  if (!Dir.empty())
+  if (Dir)
     AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
   if (DIUnit.isOptimized())
     AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
-  std::string Flags;
-  DIUnit.getFlags(Flags);
-  if (!Flags.empty())
+  if (const char *Flags = DIUnit.getFlags())
     AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
 
   unsigned RVer = DIUnit.getRunTimeVersion();
@@ -1203,28 +1589,24 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
     ModuleCU = Unit;
   }
 
-  CompileUnitMap[DIUnit.getGV()] = Unit;
+  CompileUnitMap[DIUnit.getNode()] = Unit;
   CompileUnits.push_back(Unit);
 }
 
-void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
-  DIGlobalVariable DI_GV(GV);
+void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) {
+  DIGlobalVariable DI_GV(N);
+
+  // If debug information is malformed then ignore it.
+  if (DI_GV.Verify() == false)
+    return;
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getGV());
+  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode());
   if (Slot)
     return;
 
   DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV);
 
-  // Add address.
-  DIEBlock *Block = new DIEBlock();
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-  std::string GLN;
-  AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
-                 Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN));
-  AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
-
   // Add to map.
   Slot = VariableDie;
 
@@ -1232,16 +1614,15 @@ void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   ModuleCU->getDie()->AddChild(VariableDie);
 
   // Expose as global. FIXME - need to check external flag.
-  std::string Name;
-  ModuleCU->AddGlobal(DI_GV.getName(Name), VariableDie);
+  ModuleCU->AddGlobal(DI_GV.getName(), VariableDie);
   return;
 }
 
-void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
-  DISubprogram SP(GV);
+void DwarfDebug::ConstructSubprogram(MDNode *N) {
+  DISubprogram SP(N);
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(GV);
+  DIE *&Slot = ModuleCU->getDieMapSlotFor(N);
   if (Slot)
     return;
 
@@ -1259,28 +1640,25 @@ void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   ModuleCU->getDie()->AddChild(SubprogramDie);
 
   // Expose as global.
-  std::string Name;
-  ModuleCU->AddGlobal(SP.getName(Name), SubprogramDie);
+  ModuleCU->AddGlobal(SP.getName(), SubprogramDie);
   return;
 }
 
-  /// BeginModule - Emit all Dwarf sections that should come prior to the
-  /// content. Create global DIEs and emit initial debug info sections.
-  /// This is inovked by the target AsmPrinter.
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
 void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   this->M = M;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  SmallVector<GlobalVariable *, 2> CUs;
-  SmallVector<GlobalVariable *, 4> GVs;
-  SmallVector<GlobalVariable *, 4> SPs;
-  CollectDebugInfoAnchors(*M, CUs, GVs, SPs);
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(*M);
 
   // Create all the compile unit DIEs.
-  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
-         E = CUs.end(); I != E; ++I) 
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+         E = DbgFinder.compile_unit_end(); I != E; ++I)
     ConstructCompileUnit(*I);
 
   if (CompileUnits.empty()) {
@@ -1295,23 +1673,19 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   if (!ModuleCU)
     ModuleCU = CompileUnits[0];
 
-  // If there is not any debug info available for any global variables and any
-  // subprograms then there is not any debug info to emit.
-  if (GVs.empty() && SPs.empty()) {
-    if (TimePassesIsEnabled)
-      DebugTimer->stopTimer();
-
-    return;
-  }
-
   // Create DIEs for each of the externally visible global variables.
-  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
-         E = GVs.end(); I != E; ++I) 
-    ConstructGlobalVariableDIE(*I);
+  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+         E = DbgFinder.global_variable_end(); I != E; ++I) {
+    DIGlobalVariable GV(*I);
+    if (GV.getContext().getNode() != GV.getCompileUnit().getNode())
+      ScopedGVs.push_back(*I);
+    else
+      ConstructGlobalVariableDIE(*I);
+  }
 
   // Create DIEs for each of the externally visible subprograms.
-  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
-         E = SPs.end(); I != E; ++I) 
+  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+         E = DbgFinder.subprogram_end(); I != E; ++I)
     ConstructSubprogram(*I);
 
   MMI = mmi;
@@ -1319,11 +1693,11 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   MMI->setDebugInfoAvailability(true);
 
   // Prime section data.
-  SectionMap.insert(TAI->getTextSection());
+  SectionMap.insert(Asm->getObjFileLowering().getTextSection());
 
   // Print out .file directives to specify files for .loc directives. These are
   // printed out early so that they precede any .loc directives.
-  if (TAI->hasDotLocAndDotFile()) {
+  if (MAI->hasDotLocAndDotFile()) {
     for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
       // Remember source id starts at 1.
       std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
@@ -1332,7 +1706,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
         FullPath.appendComponent(getSourceFileName(Id.second));
       assert(AppendOk && "Could not append filename to directory!");
       AppendOk = false;
-      Asm->EmitFile(i, FullPath.toString());
+      Asm->EmitFile(i, FullPath.str());
       Asm->EOL();
     }
   }
@@ -1347,21 +1721,21 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
 /// EndModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::EndModule() {
-  if (!ShouldEmitDwarfDebug())
+  if (!ModuleCU)
     return;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
   // Standard sections final addresses.
-  Asm->SwitchToSection(TAI->getTextSection());
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
   EmitLabel("text_end", 0);
-  Asm->SwitchToSection(TAI->getDataSection());
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
   EmitLabel("data_end", 0);
 
   // End text sections.
   for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
-    Asm->SwitchToSection(SectionMap[i]);
+    Asm->OutStreamer.SwitchSection(SectionMap[i]);
     EmitLabel("section_end", i);
   }
 
@@ -1410,6 +1784,135 @@ void DwarfDebug::EndModule() {
     DebugTimer->stopTimer();
 }
 
+/// CollectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::CollectVariableInfo() {
+  if (!MMI) return;
+  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+         VE = VMap.end(); VI != VE; ++VI) {
+    MetadataBase *MB = VI->first;
+    MDNode *Var = dyn_cast_or_null<MDNode>(MB);
+    DIVariable DV (Var);
+    if (DV.isNull()) continue;
+    unsigned VSlot = VI->second;
+    DbgScope *Scope = getDbgScope(DV.getContext().getNode(),  NULL);
+    Scope->AddVariable(new DbgVariable(DV, VSlot, false));
+  }
+}
+
+/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
+/// start with this machine instruction.
+void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) {
+  InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
+  if (I == DbgScopeBeginMap.end())
+    return;
+  SmallVector<DbgScope *, 2> &SD = I->second;
+  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+       SDI != SDE; ++SDI) 
+    (*SDI)->setStartLabelID(Label);
+}
+
+/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
+/// end with this machine instruction.
+void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) {
+  InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
+  if (I == DbgScopeEndMap.end())
+    return;
+  SmallVector<DbgScope *, 2> &SD = I->second;
+  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+       SDI != SDE; ++SDI) 
+    (*SDI)->setEndLabelID(Label);
+}
+
+/// ExtractScopeInformation - Scan machine instructions in this function
+/// and collect DbgScopes. Return true, if atleast one scope was found.
+bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
+  // If scope information was extracted using .dbg intrinsics then there is not
+  // any need to extract these information by scanning each instruction.
+  if (!DbgScopeMap.empty())
+    return false;
+
+  // Scan each instruction and create scopes.
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      DebugLoc DL = MInsn->getDebugLoc();
+      if (DL.isUnknown())
+        continue;
+      DebugLocTuple DLT = MF->getDebugLocTuple(DL);
+      if (!DLT.Scope)
+        continue;
+      // There is no need to create another DIE for compile unit. For all
+      // other scopes, create one DbgScope now. This will be translated 
+      // into a scope DIE at the end.
+      DIDescriptor D(DLT.Scope);
+      if (!D.isCompileUnit()) {
+        DbgScope *Scope = getDbgScope(DLT.Scope, MInsn);
+        Scope->setLastInsn(MInsn);
+      }
+    }
+  }
+
+  // If a scope's last instruction is not set then use its child scope's
+  // last instruction as this scope's last instrunction.
+  for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
+	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
+    assert (DI->second->getFirstInsn() && "Invalid first instruction!");
+    DI->second->FixInstructionMarkers();
+    assert (DI->second->getLastInsn() && "Invalid last instruction!");
+  }
+
+  // Each scope has first instruction and last instruction to mark beginning
+  // and end of a scope respectively. Create an inverse map that list scopes
+  // starts (and ends) with an instruction. One instruction may start (or end)
+  // multiple scopes.
+  for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
+	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
+    DbgScope *S = DI->second;
+    assert (S && "DbgScope is missing!");
+    const MachineInstr *MI = S->getFirstInsn();
+    assert (MI && "DbgScope does not have first instruction!");
+
+    InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
+    if (IDI != DbgScopeBeginMap.end())
+      IDI->second.push_back(S);
+    else
+      DbgScopeBeginMap.insert(std::make_pair(MI, 
+                                             SmallVector<DbgScope *, 2>(2, S)));
+
+    MI = S->getLastInsn();
+    assert (MI && "DbgScope does not have last instruction!");
+    IDI = DbgScopeEndMap.find(MI);
+    if (IDI != DbgScopeEndMap.end())
+      IDI->second.push_back(S);
+    else
+      DbgScopeEndMap.insert(std::make_pair(MI,
+                                             SmallVector<DbgScope *, 2>(2, S)));
+  }
+
+  return !DbgScopeMap.empty();
+}
+
+static DISubprogram getDISubprogram(MDNode *N) {
+
+  DIDescriptor D(N);
+  if (D.isNull())
+    return DISubprogram();
+
+  if (D.isCompileUnit()) 
+    return DISubprogram();
+
+  if (D.isSubprogram())
+    return DISubprogram(N);
+
+  if (D.isLexicalBlock())
+    return getDISubprogram(DILexicalBlock(N).getContext().getNode());
+
+  llvm_unreachable("Unexpected Descriptor!");
+}
+
 /// BeginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::BeginFunction(MachineFunction *MF) {
@@ -1420,6 +1923,12 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  if (!ExtractScopeInformation(MF))
+    return;
+  CollectVariableInfo();
+#endif
+
   // Begin accumulating function debug information.
   MMI->BeginFunction(MF);
 
@@ -1428,14 +1937,28 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
 
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   DebugLoc FDL = MF->getDefaultDebugLoc();
   if (!FDL.isUnknown()) {
     DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
-    unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col,
-                                        DICompileUnit(DLT.CompileUnit));
+    unsigned LabelID = 0;
+    DISubprogram SP = getDISubprogram(DLT.Scope);
+    if (!SP.isNull())
+      LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope);
+    else
+      LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
     Asm->printLabel(LabelID);
+    O << '\n';
   }
-
+#else
+  DebugLoc FDL = MF->getDefaultDebugLoc();
+  if (!FDL.isUnknown()) {
+    DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
+    unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
+    Asm->printLabel(LabelID);
+    O << '\n';
+  }
+#endif
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
@@ -1448,13 +1971,17 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  if (DbgScopeMap.empty())
+    return;
+#endif
   // Define end label for subprogram.
   EmitLabel("func_end", SubprogramCount);
 
   // Get function line info.
   if (!Lines.empty()) {
     // Get section line info.
-    unsigned ID = SectionMap.insert(Asm->CurrentSection_);
+    unsigned ID = SectionMap.insert(Asm->getCurrentSection());
     if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
     std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
     // Append the function info to section info.
@@ -1489,9 +2016,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
   if (FunctionDbgScope) {
     delete FunctionDbgScope;
     DbgScopeMap.clear();
+    DbgScopeBeginMap.clear();
+    DbgScopeEndMap.clear();
     DbgAbstractScopeMap.clear();
     DbgConcreteScopeMap.clear();
-    InlinedVariableScopes.clear();
     FunctionDbgScope = NULL;
     LexicalScopeStack.clear();
     AbstractInstanceRootList.clear();
@@ -1507,32 +2035,34 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
 /// RecordSourceLine - Records location information and associates it with a
 /// label. Returns a unique label ID used to generate a label and provide
 /// correspondence to the source line list.
-unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  CompileUnit *Unit = CompileUnitMap[V];
-  assert(Unit && "Unable to find CompileUnit");
-  unsigned ID = MMI->NextLabelID();
-  Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID));
+unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, 
+                                      MDNode *S) {
+  if (!MMI)
+    return 0;
 
   if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return ID;
-}
-
-/// RecordSourceLine - Records location information and associates it with a
-/// label. Returns a unique label ID used to generate a label and provide
-/// correspondence to the source line list.
-unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col,
-                                      DICompileUnit CU) {
-  if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  std::string Dir, Fn;
-  unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir),
-                                     CU.getFilename(Fn));
+  const char *Dir = NULL;
+  const char *Fn = NULL;
+
+  DIDescriptor Scope(S);
+  if (Scope.isCompileUnit()) {
+    DICompileUnit CU(S);
+    Dir = CU.getDirectory();
+    Fn = CU.getFilename();
+  } else if (Scope.isSubprogram()) {
+    DISubprogram SP(S);
+    Dir = SP.getDirectory();
+    Fn = SP.getFilename();
+  } else if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(S);
+    Dir = DB.getDirectory();
+    Fn = DB.getFilename();
+  } else
+    assert (0 && "Unexpected scope info");
+
+  unsigned Src = GetOrCreateSourceID(Dir, Fn);
   unsigned ID = MMI->NextLabelID();
   Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
 
@@ -1552,7 +2082,7 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  unsigned SrcId = GetOrCreateSourceID(DirName, FileName);
+  unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
@@ -1561,11 +2091,11 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
 }
 
 /// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) {
+unsigned DwarfDebug::RecordRegionStart(MDNode *N) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  DbgScope *Scope = getOrCreateScope(V);
+  DbgScope *Scope = getOrCreateScope(N);
   unsigned ID = MMI->NextLabelID();
   if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
   LexicalScopeStack.push_back(Scope);
@@ -1577,11 +2107,11 @@ unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) {
 }
 
 /// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
+unsigned DwarfDebug::RecordRegionEnd(MDNode *N) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  DbgScope *Scope = getOrCreateScope(V);
+  DbgScope *Scope = getOrCreateScope(N);
   unsigned ID = MMI->NextLabelID();
   Scope->setEndLabelID(ID);
   // FIXME : region.end() may not be in the last basic block.
@@ -1598,62 +2128,36 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
 }
 
 /// RecordVariable - Indicate the declaration of a local variable.
-void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
-                                const MachineInstr *MI) {
+void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  DIDescriptor Desc(GV);
+  DIDescriptor Desc(N);
   DbgScope *Scope = NULL;
   bool InlinedFnVar = false;
 
-  if (Desc.getTag() == dwarf::DW_TAG_variable) {
-    // GV is a global variable.
-    DIGlobalVariable DG(GV);
-    Scope = getOrCreateScope(DG.getContext().getGV());
-  } else {
-    DenseMap<const MachineInstr *, DbgScope *>::iterator
-      SI = InlinedVariableScopes.find(MI);
-
-    if (SI != InlinedVariableScopes.end()) {
-      // or GV is an inlined local variable.
-      Scope = SI->second;
-    } else {
-      DIVariable DV(GV);
-      GlobalVariable *V = DV.getContext().getGV();
-
-      // FIXME: The code that checks for the inlined local variable is a hack!
-      DenseMap<const GlobalVariable *, DbgScope *>::iterator
-        AI = AbstractInstanceRootMap.find(V);
-
-      if (AI != AbstractInstanceRootMap.end()) {
-        // This method is called each time a DECLARE node is encountered. For an
-        // inlined function, this could be many, many times. We don't want to
-        // re-add variables to that DIE for each time. We just want to add them
-        // once. Check to make sure that we haven't added them already.
-        DenseMap<const GlobalVariable *,
-          SmallSet<const GlobalVariable *, 32> >::iterator
-          IP = InlinedParamMap.find(V);
-
-        if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) {
-          if (TimePassesIsEnabled)
-            DebugTimer->stopTimer();
-          return;
-        }
-
-        // or GV is an inlined local variable.
-        Scope = AI->second;
-        InlinedParamMap[V].insert(GV);
-        InlinedFnVar = true;
-      } else {
-        // or GV is a local variable.
-        Scope = getOrCreateScope(V);
+  if (Desc.getTag() == dwarf::DW_TAG_variable)
+    Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode());
+  else {
+    bool InlinedVar = false;
+    MDNode *Context = DIVariable(N).getContext().getNode();
+    DISubprogram SP(Context);
+    if (!SP.isNull()) {
+      // SP is inserted into DbgAbstractScopeMap when inlined function
+      // start was recorded by RecordInlineFnStart.
+      DenseMap<MDNode *, DbgScope *>::iterator
+        I = DbgAbstractScopeMap.find(SP.getNode());
+      if (I != DbgAbstractScopeMap.end()) {
+        InlinedVar = true;
+        Scope = I->second;
       }
     }
+    if (!InlinedVar)
+      Scope = getOrCreateScope(Context);
   }
 
   assert(Scope && "Unable to find the variable's scope");
-  DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar);
+  DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar);
   Scope->AddVariable(DV);
 
   if (TimePassesIsEnabled)
@@ -1665,23 +2169,23 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
                                           unsigned Line, unsigned Col) {
   unsigned LabelID = MMI->NextLabelID();
 
-  if (!TAI->doesDwarfUsesInlineInfoSection())
+  if (!MAI->doesDwarfUsesInlineInfoSection())
     return LabelID;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  GlobalVariable *GV = SP.getGV();
-  DenseMap<const GlobalVariable *, DbgScope *>::iterator
-    II = AbstractInstanceRootMap.find(GV);
+  MDNode *Node = SP.getNode();
+  DenseMap<const MDNode *, DbgScope *>::iterator
+    II = AbstractInstanceRootMap.find(Node);
 
   if (II == AbstractInstanceRootMap.end()) {
     // Create an abstract instance entry for this inlined function if it doesn't
     // already exist.
-    DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
+    DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node));
 
     // Get the compile unit context.
-    DIE *SPDie = ModuleCU->getDieMapSlotFor(GV);
+    DIE *SPDie = ModuleCU->getDieMapSlotFor(Node);
     if (!SPDie)
       SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true);
 
@@ -1693,18 +2197,18 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
     AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
 
     // Keep track of the abstract scope for this function.
-    DbgAbstractScopeMap[GV] = Scope;
+    DbgAbstractScopeMap[Node] = Scope;
 
-    AbstractInstanceRootMap[GV] = Scope;
+    AbstractInstanceRootMap[Node] = Scope;
     AbstractInstanceRootList.push_back(Scope);
   }
 
   // Create a concrete inlined instance for this inlined function.
-  DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
+  DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node));
   DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
   ScopeDie->setAbstractCompileUnit(ModuleCU);
 
-  DIE *Origin = ModuleCU->getDieMapSlotFor(GV);
+  DIE *Origin = ModuleCU->getDieMapSlotFor(Node);
   AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, Origin);
   AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
@@ -1718,20 +2222,20 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
 
   // Keep track of the concrete scope that's inlined into this function.
-  DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
-    SI = DbgConcreteScopeMap.find(GV);
+  DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
+    SI = DbgConcreteScopeMap.find(Node);
 
   if (SI == DbgConcreteScopeMap.end())
-    DbgConcreteScopeMap[GV].push_back(ConcreteScope);
+    DbgConcreteScopeMap[Node].push_back(ConcreteScope);
   else
     SI->second.push_back(ConcreteScope);
 
   // Track the start label for this inlined function.
-  DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
-    I = InlineInfo.find(GV);
+  DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator
+    I = InlineInfo.find(Node);
 
   if (I == InlineInfo.end())
-    InlineInfo[GV].push_back(LabelID);
+    InlineInfo[Node].push_back(LabelID);
   else
     I->second.push_back(LabelID);
 
@@ -1743,15 +2247,15 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
 
 /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
 unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
-  if (!TAI->doesDwarfUsesInlineInfoSection())
+  if (!MAI->doesDwarfUsesInlineInfoSection())
     return 0;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  GlobalVariable *GV = SP.getGV();
-  DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
-    I = DbgConcreteScopeMap.find(GV);
+  MDNode *Node = SP.getNode();
+  DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
+    I = DbgConcreteScopeMap.find(Node);
 
   if (I == DbgConcreteScopeMap.end()) {
     // FIXME: Can this situation actually happen? And if so, should it?
@@ -1781,33 +2285,6 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
   return ID;
 }
 
-/// RecordVariableScope - Record scope for the variable declared by
-/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
-/// for only inlined subroutine variables. Other variables's scopes are
-/// determined during RecordVariable().
-void DwarfDebug::RecordVariableScope(DIVariable &DV,
-                                     const MachineInstr *DeclareMI) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  DISubprogram SP(DV.getContext().getGV());
-
-  if (SP.isNull()) {
-    if (TimePassesIsEnabled)
-      DebugTimer->stopTimer();
-
-    return;
-  }
-
-  DenseMap<GlobalVariable *, DbgScope *>::iterator
-    I = DbgAbstractScopeMap.find(SP.getGV());
-  if (I != DbgAbstractScopeMap.end())
-    InlinedVariableScopes[DeclareMI] = I->second;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-}
-
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
@@ -1832,7 +2309,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
   Die->setOffset(Offset);
 
   // Start the size with the size of abbreviation code.
-  Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber);
+  Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
 
   const SmallVector<DIEValue*, 32> &Values = Die->getValues();
   const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
@@ -1879,38 +2356,40 @@ void DwarfDebug::EmitInitial() {
   if (didInitial) return;
   didInitial = true;
 
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   // Dwarf sections base addresses.
-  if (TAI->doesDwarfRequireFrameSection()) {
-    Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+  if (MAI->doesDwarfRequireFrameSection()) {
+    Asm->OutStreamer.SwitchSection(TLOF.getDwarfFrameSection());
     EmitLabel("section_debug_frame", 0);
   }
 
-  Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfInfoSection());
   EmitLabel("section_info", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfAbbrevSection());
   EmitLabel("section_abbrev", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfARangesSection());
   EmitLabel("section_aranges", 0);
 
-  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
-    Asm->SwitchToDataSection(LineInfoDirective);
+  if (const MCSection *LineInfoDirective = TLOF.getDwarfMacroInfoSection()) {
+    Asm->OutStreamer.SwitchSection(LineInfoDirective);
     EmitLabel("section_macinfo", 0);
   }
 
-  Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfLineSection());
   EmitLabel("section_line", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfLocSection());
   EmitLabel("section_loc", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection());
   EmitLabel("section_pubnames", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection());
   EmitLabel("section_str", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection());
   EmitLabel("section_ranges", 0);
 
-  Asm->SwitchToSection(TAI->getTextSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getTextSection());
   EmitLabel("text_begin", 0);
-  Asm->SwitchToSection(TAI->getDataSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDataSection());
   EmitLabel("data_begin", 0);
 }
 
@@ -2012,7 +2491,8 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
 
 void DwarfDebug::EmitDebugInfo() {
   // Start debug info section.
-  Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfInfoSection());
 
   EmitDebugInfoPerCU(ModuleCU);
 }
@@ -2023,7 +2503,8 @@ void DwarfDebug::EmitAbbreviations() const {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
-    Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+    Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfAbbrevSection());
 
     EmitLabel("abbrev_begin", 0);
 
@@ -2071,7 +2552,7 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
 void DwarfDebug::EmitDebugLines() {
   // If the target is using .loc/.file, the assembler will be emitting the
   // .debug_line table automatically.
-  if (TAI->hasDotLocAndDotFile())
+  if (MAI->hasDotLocAndDotFile())
     return;
 
   // Minimum line delta, thus ranging from -10..(255-10).
@@ -2080,7 +2561,8 @@ void DwarfDebug::EmitDebugLines() {
   const int MaxLineDelta = 255 + MinLineDelta;
 
   // Start the dwarf line section.
-  Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfLineSection());
 
   // Construct the section header.
   EmitDifference("line_end", 0, "line_begin", 0, true);
@@ -2147,13 +2629,12 @@ void DwarfDebug::EmitDebugLines() {
     // Isolate current sections line info.
     const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
 
-    if (Asm->isVerbose()) {
-      const Section* S = SectionMap[j + 1];
-      O << '\t' << TAI->getCommentString() << " Section"
+    /*if (Asm->isVerbose()) {
+      const MCSection *S = SectionMap[j + 1];
+      O << '\t' << MAI->getCommentString() << " Section"
         << S->getName() << '\n';
-    } else {
-      Asm->EOL();
-    }
+    }*/
+    Asm->EOL();
 
     // Dwarf assumes we start with first line of first source file.
     unsigned Source = 1;
@@ -2165,12 +2646,14 @@ void DwarfDebug::EmitDebugLines() {
       unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
       if (!LabelID) continue;
 
+      if (LineInfo.getLine() == 0) continue;
+
       if (!Asm->isVerbose())
         Asm->EOL();
       else {
         std::pair<unsigned, unsigned> SourceID =
           getSourceDirectoryAndFileIds(LineInfo.getSourceID());
-        O << '\t' << TAI->getCommentString() << ' '
+        O << '\t' << MAI->getCommentString() << ' '
           << getSourceDirectoryName(SourceID.first) << ' '
           << getSourceFileName(SourceID.second)
           <<" :" << utostr_32(LineInfo.getLine()) << '\n';
@@ -2231,7 +2714,7 @@ void DwarfDebug::EmitDebugLines() {
 /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
 void DwarfDebug::EmitCommonDebugFrame() {
-  if (!TAI->doesDwarfRequireFrameSection())
+  if (!MAI->doesDwarfRequireFrameSection())
     return;
 
   int stackGrowth =
@@ -2240,7 +2723,8 @@ void DwarfDebug::EmitCommonDebugFrame() {
     TD->getPointerSize() : -TD->getPointerSize();
 
   // Start the dwarf frame section.
-  Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfFrameSection());
 
   EmitLabel("debug_frame_common", 0);
   EmitDifference("debug_frame_common_end", 0,
@@ -2276,11 +2760,12 @@ void DwarfDebug::EmitCommonDebugFrame() {
 /// section.
 void
 DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
-  if (!TAI->doesDwarfRequireFrameSection())
+  if (!MAI->doesDwarfRequireFrameSection())
     return;
 
   // Start the dwarf frame section.
-  Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfFrameSection());
 
   EmitDifference("debug_frame_end", DebugFrameInfo.Number,
                  "debug_frame_begin", DebugFrameInfo.Number, true);
@@ -2344,7 +2829,8 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
 ///
 void DwarfDebug::EmitDebugPubNames() {
   // Start the dwarf pubnames section.
-  Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfPubNamesSection());
 
   EmitDebugPubNamesPerCU(ModuleCU);
 }
@@ -2355,7 +2841,8 @@ void DwarfDebug::EmitDebugStr() {
   // Check to see if it is worth the effort.
   if (!StringPool.empty()) {
     // Start the dwarf str section.
-    Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+    Asm->OutStreamer.SwitchSection(
+                                Asm->getObjFileLowering().getDwarfStrSection());
 
     // For each of strings in the string pool.
     for (unsigned StringID = 1, N = StringPool.size();
@@ -2376,7 +2863,8 @@ void DwarfDebug::EmitDebugStr() {
 ///
 void DwarfDebug::EmitDebugLoc() {
   // Start the dwarf loc section.
-  Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfLocSection());
   Asm->EOL();
 }
 
@@ -2384,7 +2872,8 @@ void DwarfDebug::EmitDebugLoc() {
 ///
 void DwarfDebug::EmitDebugARanges() {
   // Start the dwarf aranges section.
-  Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfARangesSection());
 
   // FIXME - Mock up
 #if 0
@@ -2420,16 +2909,18 @@ void DwarfDebug::EmitDebugARanges() {
 ///
 void DwarfDebug::EmitDebugRanges() {
   // Start the dwarf ranges section.
-  Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfRangesSection());
   Asm->EOL();
 }
 
 /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
 ///
 void DwarfDebug::EmitDebugMacInfo() {
-  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
+  if (const MCSection *LineInfo =
+      Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
     // Start the dwarf macinfo section.
-    Asm->SwitchToDataSection(LineInfoDirective);
+    Asm->OutStreamer.SwitchSection(LineInfo);
     Asm->EOL();
   }
 }
@@ -2453,13 +2944,14 @@ void DwarfDebug::EmitDebugMacInfo() {
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
 void DwarfDebug::EmitDebugInlineInfo() {
-  if (!TAI->doesDwarfUsesInlineInfoSection())
+  if (!MAI->doesDwarfUsesInlineInfoSection())
     return;
 
   if (!ModuleCU)
     return;
 
-  Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection());
+  Asm->OutStreamer.SwitchSection(
+                        Asm->getObjFileLowering().getDwarfDebugInlineSection());
   Asm->EOL();
   EmitDifference("debug_inlined_end", 1,
                  "debug_inlined_begin", 1, true);
@@ -2470,18 +2962,25 @@ void DwarfDebug::EmitDebugInlineInfo() {
   Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
   Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
 
-  for (DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
+  for (DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator
          I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
-    GlobalVariable *GV = I->first;
+    MDNode *Node = I->first;
     SmallVector<unsigned, 4> &Labels = I->second;
-    DISubprogram SP(GV);
-    std::string Name;
-    std::string LName;
-
-    SP.getLinkageName(LName);
-    SP.getName(Name);
+    DISubprogram SP(Node);
+    const char *LName = SP.getLinkageName();
+    const char *Name = SP.getName();
 
-    Asm->EmitString(LName.empty() ? Name : LName);
+    if (!LName)
+      Asm->EmitString(Name);
+    else {
+      // Skip special LLVM prefix that is used to inform the asm printer to not
+      // emit usual symbol prefix before the symbol name. This happens for
+      // Objective-C symbol names and symbol whose name is replaced using GCC's
+      // __asm__ attribute.
+      if (LName[0] == 1)
+        LName = &LName[1];
+      Asm->EmitString(LName);
+    }
     Asm->EOL("MIPS linkage name");
 
     Asm->EmitString(Name); Asm->EOL("Function name");
@@ -2490,13 +2989,13 @@ void DwarfDebug::EmitDebugInlineInfo() {
 
     for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
-      DIE *SP = ModuleCU->getDieMapSlotFor(GV);
+      DIE *SP = ModuleCU->getDieMapSlotFor(Node);
       Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
 
       if (TD->getPointerSize() == sizeof(int32_t))
-        O << TAI->getData32bitsDirective();
+        O << MAI->getData32bitsDirective();
       else
-        O << TAI->getData64bitsDirective();
+        O << MAI->getData64bitsDirective();
 
       PrintLabelName("label", *LI); Asm->EOL("low_pc");
     }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 101dc70..bd377c5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -35,7 +35,7 @@ class DbgScope;
 class DbgConcreteScope;
 class MachineFrameInfo;
 class MachineModuleInfo;
-class TargetAsmInfo;
+class MCAsmInfo;
 class Timer;
 
 //===----------------------------------------------------------------------===//
@@ -120,7 +120,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
 
   /// SectionMap - Provides a unique id per text section.
   ///
-  UniqueVector<const Section*> SectionMap;
+  UniqueVector<const MCSection*> SectionMap;
 
   /// SectionSourceLines - Tracks line numbers per text section.
   ///
@@ -139,34 +139,38 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   DbgScope *FunctionDbgScope;
   
   /// DbgScopeMap - Tracks the scopes in the current function.
-  DenseMap<GlobalVariable *, DbgScope *> DbgScopeMap;
+  DenseMap<MDNode *, DbgScope *> DbgScopeMap;
+
+  /// ScopedGVs - Tracks global variables that are not at file scope.
+  /// For example void f() { static int b = 42; }
+  SmallVector<WeakVH, 4> ScopedGVs;
+
+  typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> > 
+    InsnToDbgScopeMapTy;
+
+  /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts.
+  InsnToDbgScopeMapTy DbgScopeBeginMap;
+
+  /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
+  InsnToDbgScopeMapTy DbgScopeEndMap;
 
   /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
   /// function.
-  DenseMap<GlobalVariable *, DbgScope *> DbgAbstractScopeMap;
+  DenseMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
 
   /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
   /// function.
-  DenseMap<GlobalVariable *,
+  DenseMap<MDNode *,
            SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
-  DenseMap<GlobalVariable *, SmallVector<unsigned, 4> > InlineInfo;
-
-  /// InlinedVariableScopes - Scopes information for the inlined subroutine
-  /// variables.
-  DenseMap<const MachineInstr *, DbgScope *> InlinedVariableScopes;
+  DenseMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
 
   /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
   /// functions. These are subroutine entries that contain a DW_AT_inline
   /// attribute.
-  DenseMap<const GlobalVariable *, DbgScope *> AbstractInstanceRootMap;
-
-  /// InlinedParamMap - A map keeping track of which parameters are assigned to
-  /// which abstract instance.
-  DenseMap<const GlobalVariable *,
-    SmallSet<const GlobalVariable *, 32> > InlinedParamMap;
+  DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap;
 
   /// AbstractInstanceRootList - List of abstract instance roots of inlined
   /// functions. These are subroutine entries that contain a DW_AT_inline
@@ -284,11 +288,8 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   /// AddSourceLine - Add location information to specified debug information
   /// entry.
   void AddSourceLine(DIE *Die, const DIVariable *V);
-
-  /// AddSourceLine - Add location information to specified debug information
-  /// entry.
   void AddSourceLine(DIE *Die, const DIGlobal *G);
-
+  void AddSourceLine(DIE *Die, const DISubprogram *SP);
   void AddSourceLine(DIE *Die, const DIType *Ty);
 
   /// AddAddress - Add an address attribute to a die based on the location
@@ -296,6 +297,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   void AddAddress(DIE *Die, unsigned Attribute,
                   const MachineLocation &Location);
 
+  /// AddComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  ///
+  void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of AddComplexAddress.
+  /// AddBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use AddComplexAddress instead.
+  ///
+  void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location);
+
   /// AddType - Add a new type attribute to the specified entity.
   void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
 
@@ -342,9 +361,10 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   ///
   DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
 
-  /// getOrCreateScope - Returns the scope associated with the given descriptor.
+  /// getDbgScope - Returns the scope associated with the given descriptor.
   ///
-  DbgScope *getOrCreateScope(GlobalVariable *V);
+  DbgScope *getOrCreateScope(MDNode *N);
+  DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI);
 
   /// ConstructDbgScope - Construct the components of a scope.
   ///
@@ -454,20 +474,26 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   /// source file names. If none currently exists, create a new id and insert it
   /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
   /// as well.
-  unsigned GetOrCreateSourceID(const std::string &DirName,
-                               const std::string &FileName);
+  unsigned GetOrCreateSourceID(const char *DirName,
+                               const char *FileName);
 
-  void ConstructCompileUnit(GlobalVariable *GV);
+  void ConstructCompileUnit(MDNode *N);
 
-  void ConstructGlobalVariableDIE(GlobalVariable *GV);
+  void ConstructGlobalVariableDIE(MDNode *N);
 
-  void ConstructSubprogram(GlobalVariable *GV);
+  void ConstructSubprogram(MDNode *N);
+
+  // FIXME: This should go away in favor of complex addresses.
+  /// Find the type the programmer originally declared the variable to be
+  /// and return that type.  Obsolete, use GetComplexAddrType instead.
+  ///
+  DIType GetBlockByrefType(DIType Ty, std::string Name);
 
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+  DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
   virtual ~DwarfDebug();
 
   /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
@@ -493,12 +519,7 @@ public:
   /// RecordSourceLine - Records location information and associates it with a 
   /// label. Returns a unique label ID used to generate a label and provide
   /// correspondence to the source line list.
-  unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col);
-  
-  /// RecordSourceLine - Records location information and associates it with a 
-  /// label. Returns a unique label ID used to generate a label and provide
-  /// correspondence to the source line list.
-  unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU);
+  unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
 
   /// getRecordSourceLineCount - Return the number of source lines in the debug
   /// info.
@@ -515,14 +536,13 @@ public:
                                const std::string &FileName);
 
   /// RecordRegionStart - Indicate the start of a region.
-  unsigned RecordRegionStart(GlobalVariable *V);
+  unsigned RecordRegionStart(MDNode *N);
 
   /// RecordRegionEnd - Indicate the end of a region.
-  unsigned RecordRegionEnd(GlobalVariable *V);
+  unsigned RecordRegionEnd(MDNode *N);
 
   /// RecordVariable - Indicate the declaration of  a local variable.
-  void RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
-                      const MachineInstr *MI);
+  void RecordVariable(MDNode *N, unsigned FrameIndex);
 
   //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
   unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
@@ -531,11 +551,20 @@ public:
   /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
   unsigned RecordInlinedFnEnd(DISubprogram &SP);
 
-  /// RecordVariableScope - Record scope for the variable declared by
-  /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
-  /// for only inlined subroutine variables. Other variables's scopes are
-  /// determined during RecordVariable().
-  void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI);
+  /// ExtractScopeInformation - Scan machine instructions in this function
+  /// and collect DbgScopes. Return true, if atleast one scope was found.
+  bool ExtractScopeInformation(MachineFunction *MF);
+
+  /// CollectVariableInfo - Populate DbgScope entries with variables' info.
+  void CollectVariableInfo();
+
+  /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
+  /// start with this machine instruction.
+  void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label);
+
+  /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
+  /// end with this machine instruction.
+  void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
 };
 
 } // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 37466ab..626523b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains support for writing dwarf exception info into asm files.
+// This file contains support for writing DWARF exception info into asm files.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,30 +15,38 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 static TimerGroup &getDwarfTimerGroup() {
-  static TimerGroup DwarfTimerGroup("Dwarf Exception");
+  static TimerGroup DwarfTimerGroup("DWARF Exception");
   return DwarfTimerGroup;
 }
 
 DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
-                               const TargetAsmInfo *T)
+                               const MCAsmInfo *T)
   : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false),
     shouldEmitTableModule(false), shouldEmitMovesModule(false),
     ExceptionTimer(0) {
-  if (TimePassesIsEnabled) 
-    ExceptionTimer = new Timer("Dwarf Exception Writer",
+  if (TimePassesIsEnabled)
+    ExceptionTimer = new Timer("DWARF Exception Writer",
                                getDwarfTimerGroup());
 }
 
@@ -46,21 +54,45 @@ DwarfException::~DwarfException() {
   delete ExceptionTimer;
 }
 
-void DwarfException::EmitCommonEHFrame(const Function *Personality,
-                                       unsigned Index) {
+/// SizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+
+  switch (Encoding & 0x07) {
+  case dwarf::DW_EH_PE_absptr:
+    return TD->getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+    return 8;
+  }
+
+  assert(0 && "Invalid encoded value.");
+  return 0;
+}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries.  There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // Size and sign of stack growth.
   int stackGrowth =
     Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
     TargetFrameInfo::StackGrowsUp ?
     TD->getPointerSize() : -TD->getPointerSize();
 
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  
   // Begin eh frame section.
-  Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
-
-  if (!TAI->doesRequireNonLocalEHFrameLabel())
-    O << TAI->getEHGlobalPrefix();
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
+  if (MAI->is_EHSymbolPrivate())
+    O << MAI->getPrivateGlobalPrefix();
   O << "EH_frame" << Index << ":\n";
+  
   EmitLabel("section_eh_frame", Index);
 
   // Define base labels.
@@ -79,8 +111,53 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality,
   Asm->EOL("CIE Version");
 
   // The personality presence indicates that language specific information will
-  // show up in the eh frame.
-  Asm->EmitString(Personality ? "zPLR" : "zR");
+  // show up in the eh frame.  Find out how we are supposed to lower the
+  // personality function reference:
+  const MCExpr *PersonalityRef = 0;
+  bool IsPersonalityIndirect = false, IsPersonalityPCRel = false;
+  if (PersonalityFn) {
+    // FIXME: HANDLE STATIC CODEGEN MODEL HERE.
+    
+    // In non-static mode, ask the object file how to represent this reference.
+    PersonalityRef =
+      TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang,
+                                            Asm->MMI,
+                                            IsPersonalityIndirect,
+                                            IsPersonalityPCRel);
+  }
+  
+  unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+  if (IsPersonalityIndirect)
+    PerEncoding |= dwarf::DW_EH_PE_indirect;
+  unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+  unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+  char Augmentation[5] = { 0 };
+  unsigned AugmentationSize = 0;
+  char *APtr = Augmentation + 1;
+
+  if (PersonalityRef) {
+    // There is a personality function.
+    *APtr++ = 'P';
+    AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
+  }
+
+  if (UsesLSDA[Index]) {
+    // An LSDA pointer is in the FDE augmentation.
+    *APtr++ = 'L';
+    ++AugmentationSize;
+  }
+
+  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+    // A non-default pointer encoding for the FDE.
+    *APtr++ = 'R';
+    ++AugmentationSize;
+  }
+
+  if (APtr != Augmentation + 1)
+    Augmentation[0] = 'z';
+
+  Asm->EmitString(Augmentation);
   Asm->EOL("CIE Augmentation");
 
   // Round out reader.
@@ -91,39 +168,41 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality,
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
   Asm->EOL("CIE Return Address Column");
 
-  // If there is a personality, we need to indicate the functions location.
-  if (Personality) {
-    Asm->EmitULEB128Bytes(7);
-    Asm->EOL("Augmentation Size");
-
-    if (TAI->getNeedsIndirectEncoding()) {
-      Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 |
-                    dwarf::DW_EH_PE_indirect);
-      Asm->EOL("Personality (pcrel sdata4 indirect)");
-    } else {
-      Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-      Asm->EOL("Personality (pcrel sdata4)");
+  Asm->EmitULEB128Bytes(AugmentationSize);
+  Asm->EOL("Augmentation Size");
+
+  Asm->EmitInt8(PerEncoding);
+  Asm->EOL("Personality", PerEncoding);
+
+  // If there is a personality, we need to indicate the function's location.
+  if (PersonalityRef) {
+    // If the reference to the personality function symbol is not already
+    // pc-relative, then we need to subtract our current address from it.  Do
+    // this by emitting a label and subtracting it from the expression we
+    // already have.  This is equivalent to emitting "foo - .", but we have to
+    // emit the label for "." directly.
+    if (!IsPersonalityPCRel) {
+      SmallString<64> Name;
+      raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+         << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index;
+      MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+      Asm->OutStreamer.EmitLabel(DotSym);
+      
+      PersonalityRef =  
+        MCBinaryExpr::CreateSub(PersonalityRef,
+                                MCSymbolRefExpr::Create(DotSym,Asm->OutContext),
+                                Asm->OutContext);
     }
-
-    PrintRelDirective(true);
-    O << TAI->getPersonalityPrefix();
-    Asm->EmitExternalGlobal((const GlobalVariable *)(Personality));
-    O << TAI->getPersonalitySuffix();
-    if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL"))
-      O << "-" << TAI->getPCSymbol();
+    
+    O << MAI->getData32bitsDirective();
+    PersonalityRef->print(O, MAI);
     Asm->EOL("Personality");
 
-    Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    Asm->EOL("LSDA Encoding (pcrel sdata4)");
+    Asm->EmitInt8(LSDAEncoding);
+    Asm->EOL("LSDA Encoding", LSDAEncoding);
 
-    Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    Asm->EOL("FDE Encoding (pcrel sdata4)");
-  } else {
-    Asm->EmitULEB128Bytes(1);
-    Asm->EOL("Augmentation Size");
-
-    Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    Asm->EOL("FDE Encoding (pcrel sdata4)");
+    Asm->EmitInt8(FDEEncoding);
+    Asm->EOL("FDE Encoding", FDEEncoding);
   }
 
   // Indicate locations of general callee saved registers in frame.
@@ -134,55 +213,44 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality,
   // On Darwin the linker honors the alignment of eh_frame, which means it must
   // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
   // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
-                     0, 0, false);
+  Asm->EmitAlignment(TD->getPointerSize() == 4 ? 2 : 3, 0, 0, false);
   EmitLabel("eh_frame_common_end", Index);
 
   Asm->EOL();
 }
 
-/// EmitEHFrame - Emit function exception frame information.
-///
-void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
-  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && 
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
          "Should not emit 'available externally' functions at all");
 
-  Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage();
-  Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+  const Function *TheFunc = EHFrameInfo.function;
+
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection());
 
   // Externally visible entry into the functions eh frame info. If the
   // corresponding function is static, this should not be externally visible.
-  if (linkage != Function::InternalLinkage &&
-      linkage != Function::PrivateLinkage) {
-    if (const char *GlobalEHDirective = TAI->getGlobalEHDirective())
+  if (!TheFunc->hasLocalLinkage())
+    if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
       O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
-  }
 
   // If corresponding function is weak definition, this should be too.
-  if ((linkage == Function::WeakAnyLinkage ||
-       linkage == Function::WeakODRLinkage ||
-       linkage == Function::LinkOnceAnyLinkage ||
-       linkage == Function::LinkOnceODRLinkage) &&
-      TAI->getWeakDefDirective())
-    O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+  if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
+    O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
 
   // If there are no calls then you can't unwind.  This may mean we can omit the
   // EH Frame, but some environments do not handle weak absolute symbols. If
   // UnwindTablesMandatory is set we cannot do this optimization; the unwind
   // info is to be available for non-EH uses.
-  if (!EHFrameInfo.hasCalls &&
-      !UnwindTablesMandatory &&
-      ((linkage != Function::WeakAnyLinkage &&
-        linkage != Function::WeakODRLinkage &&
-        linkage != Function::LinkOnceAnyLinkage &&
-        linkage != Function::LinkOnceODRLinkage) ||
-       !TAI->getWeakDefDirective() ||
-       TAI->getSupportsWeakOmittedEHFrame())) {
+  if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory &&
+      (!TheFunc->isWeakForLinker() ||
+       !MAI->getWeakDefDirective() ||
+       MAI->getSupportsWeakOmittedEHFrame())) {
     O << EHFrameInfo.FnName << " = 0\n";
     // This name has no connection to the function, so it might get
     // dead-stripped when the function is not, erroneously.  Prohibit
     // dead-stripping unconditionally.
-    if (const char *UsedDirective = TAI->getUsedDirective())
+    if (const char *UsedDirective = MAI->getUsedDirective())
       O << UsedDirective << EHFrameInfo.FnName << "\n\n";
   } else {
     O << EHFrameInfo.FnName << ":\n";
@@ -194,17 +262,9 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
 
     EmitLabel("eh_frame_begin", EHFrameInfo.Number);
 
-    if (TAI->doesRequireNonLocalEHFrameLabel()) {
-      PrintRelDirective(true, true);
-      PrintLabelName("eh_frame_begin", EHFrameInfo.Number);
-
-      if (!TAI->isAbsoluteEHSectionOffsets())
-        O << "-EH_frame" << EHFrameInfo.PersonalityIndex;
-    } else {
-      EmitSectionOffset("eh_frame_begin", "eh_frame_common",
-                        EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
-                        true, true, false);
-    }
+    EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+                      EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+                      true, true, false);
 
     Asm->EOL("FDE CIE offset");
 
@@ -216,14 +276,20 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
 
     // If there is a personality and landing pads then point to the language
     // specific data area in the exception table.
-    if (EHFrameInfo.PersonalityIndex) {
-      Asm->EmitULEB128Bytes(4);
+    if (MMI->getPersonalities()[0] != NULL) {
+      bool is4Byte = TD->getPointerSize() == sizeof(int32_t);
+
+      Asm->EmitULEB128Bytes(is4Byte ? 4 : 8);
       Asm->EOL("Augmentation size");
 
       if (EHFrameInfo.hasLandingPads)
-        EmitReference("exception", EHFrameInfo.Number, true, true);
-      else
-        Asm->EmitInt32((int)0);
+        EmitReference("exception", EHFrameInfo.Number, true, false);
+      else {
+        if (is4Byte)
+          Asm->EmitInt32((int)0);
+        else
+          Asm->EmitInt64((int)0);
+      }
       Asm->EOL("Language Specific Data Area");
     } else {
       Asm->EmitULEB128Bytes(0);
@@ -231,7 +297,7 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
     }
 
     // Indicate locations of function specific callee saved registers in frame.
-    EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, 
+    EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
                    true);
 
     // On Darwin the linker honors the alignment of eh_frame, which means it
@@ -246,32 +312,13 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
     // retains the function in this case, and there is code around that depends
     // on unused functions (calling undefined externals) being dead-stripped to
     // link correctly.  Yes, there really is.
-    if (MMI->getUsedFunctions().count(EHFrameInfo.function))
-      if (const char *UsedDirective = TAI->getUsedDirective())
+    if (MMI->isUsedFunction(EHFrameInfo.function))
+      if (const char *UsedDirective = MAI->getUsedDirective())
         O << UsedDirective << EHFrameInfo.FnName << "\n\n";
   }
-}
 
-/// EmitExceptionTable - Emit landing pads and actions.
-///
-/// The general organization of the table is complex, but the basic concepts are
-/// easy.  First there is a header which describes the location and organization
-/// of the three components that follow.
-/// 
-///  1. The landing pad site information describes the range of code covered by
-///     the try.  In our case it's an accumulation of the ranges covered by the
-///     invokes in the try.  There is also a reference to the landing pad that
-///     handles the exception once processed.  Finally an index into the actions
-///     table.
-///  2. The action table, in our case, is composed of pairs of type ids and next
-///     action offset.  Starting with the action index from the landing pad
-///     site, each type Id is checked for a match to the current exception.  If
-///     it matches then the exception and type id are passed on to the landing
-///     pad.  Otherwise the next action is looked up.  This chain is terminated
-///     with a next action of zero.  If no type id is found the the frame is
-///     unwound and handling continues.
-///  3. Type id table contains references to all the C++ typeinfo for all
-///     catches in the function.  This tables is reversed indexed base 1.
+  Asm->EOL();
+}
 
 /// SharedTypeIds - How many leading type ids two landing pads have in common.
 unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
@@ -301,51 +348,58 @@ bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
   return LSize < RSize;
 }
 
-void DwarfException::EmitExceptionTable() {
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  if (PadInfos.empty()) return;
-
-  // Sort the landing pads in order of their type ids.  This is used to fold
-  // duplicate actions.
-  SmallVector<const LandingPadInfo *, 64> LandingPads;
-  LandingPads.reserve(PadInfos.size());
-  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
-    LandingPads.push_back(&PadInfos[i]);
-  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
-
-  // Negative type ids index into FilterIds, positive type ids index into
-  // TypeInfos.  The value written for a positive type id is just the type id
-  // itself.  For a negative type id, however, the value written is the
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+                    SmallVectorImpl<ActionEntry> &Actions,
+                    SmallVectorImpl<unsigned> &FirstActions) {
+
+  // The action table follows the call-site table in the LSDA. The individual
+  // records are of two types:
+  //
+  //   * Catch clause
+  //   * Exception specification
+  //
+  // The two record kinds have the same format, with only small differences.
+  // They are distinguished by the "switch value" field: Catch clauses
+  // (TypeInfos) have strictly positive switch values, and exception
+  // specifications (FilterIds) have strictly negative switch values. Value 0
+  // indicates a catch-all clause.
+  //
+  // Negative type IDs index into FilterIds. Positive type IDs index into
+  // TypeInfos.  The value written for a positive type ID is just the type ID
+  // itself.  For a negative type ID, however, the value written is the
   // (negative) byte offset of the corresponding FilterIds entry.  The byte
-  // offset is usually equal to the type id, because the FilterIds entries are
-  // written using a variable width encoding which outputs one byte per entry as
-  // long as the value written is not too large, but can differ.  This kind of
-  // complication does not occur for positive type ids because type infos are
+  // offset is usually equal to the type ID (because the FilterIds entries are
+  // written using a variable width encoding, which outputs one byte per entry
+  // as long as the value written is not too large) but can differ.  This kind
+  // of complication does not occur for positive type IDs because type infos are
   // output using a fixed width encoding.  FilterOffsets[i] holds the byte
   // offset corresponding to FilterIds[i].
+
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   SmallVector<int, 16> FilterOffsets;
   FilterOffsets.reserve(FilterIds.size());
   int Offset = -1;
-  for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
-        E = FilterIds.end(); I != E; ++I) {
+
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
     FilterOffsets.push_back(Offset);
-    Offset -= TargetAsmInfo::getULEB128Size(*I);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
   }
 
-  // Compute the actions table and gather the first action index for each
-  // landing pad site.
-  SmallVector<ActionEntry, 32> Actions;
-  SmallVector<unsigned, 64> FirstActions;
   FirstActions.reserve(LandingPads.size());
 
   int FirstAction = 0;
   unsigned SizeActions = 0;
-  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
-    const LandingPadInfo *LP = LandingPads[i];
-    const std::vector<int> &TypeIds = LP->TypeIds;
-    const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+  const LandingPadInfo *PrevLPI = 0;
+
+  for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+         I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+    const LandingPadInfo *LPI = *I;
+    const std::vector<int> &TypeIds = LPI->TypeIds;
+    const unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
     unsigned SizeSiteActions = 0;
 
     if (NumShared < TypeIds.size()) {
@@ -353,34 +407,33 @@ void DwarfException::EmitExceptionTable() {
       ActionEntry *PrevAction = 0;
 
       if (NumShared) {
-        const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+        const unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
         PrevAction = &Actions.back();
-        SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
 
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
           SizeAction -=
-            TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+            MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
           SizeAction += -PrevAction->NextAction;
           PrevAction = PrevAction->Previous;
         }
       }
 
       // Compute the actions.
-      for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
-        int TypeID = TypeIds[I];
-        assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+      for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+        int TypeID = TypeIds[J];
+        assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
         int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
-        unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
         SizeSiteActions += SizeAction;
 
-        ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+        ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
         Actions.push_back(Action);
-
         PrevAction = &Actions.back();
       }
 
@@ -388,35 +441,34 @@ void DwarfException::EmitExceptionTable() {
       FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
     } // else identical - re-use previous FirstAction
 
+    // Information used when created the call-site table. The action record
+    // field of the call site record is the offset of the first associated
+    // action record, relative to the start of the actions table. This value is
+    // biased by 1 (1 in dicating the start of the actions table), and 0
+    // indicates that there are no actions.
     FirstActions.push_back(FirstAction);
 
     // Compute this sites contribution to size.
     SizeActions += SizeSiteActions;
-  }
-
-  // Compute the call-site table.  The entry for an invoke has a try-range
-  // containing the call, a non-zero landing pad and an appropriate action.  The
-  // entry for an ordinary call has a try-range containing the call and zero for
-  // the landing pad and the action.  Calls marked 'nounwind' have no entry and
-  // must not be contained in the try-range of any entry - they form gaps in the
-  // table.  Entries must be ordered by try-range address.
-  SmallVector<CallSiteEntry, 64> CallSites;
-
-  RangeMapType PadMap;
 
-  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
-  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
-  // try-ranges for them need be deduced.
-  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
-    const LandingPadInfo *LandingPad = LandingPads[i];
-    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      unsigned BeginLabel = LandingPad->BeginLabels[j];
-      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
-      PadRange P = { i, j };
-      PadMap[BeginLabel] = P;
-    }
+    PrevLPI = LPI;
   }
 
+  return SizeActions;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table.  The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action.  The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action.  Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table.  Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const RangeMapType &PadMap,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
   // The end label of the previous invoke or nounwind try-range.
   unsigned LastLabel = 0;
 
@@ -424,7 +476,7 @@ void DwarfException::EmitExceptionTable() {
   // an ordinary call) between the end of the previous try-range and now.
   bool SawPotentiallyThrowing = false;
 
-  // Whether the last callsite entry was for an invoke.
+  // Whether the last CallSite entry was for an invoke.
   bool PreviousIsInvoke = false;
 
   // Visit all instructions in order of address.
@@ -450,17 +502,18 @@ void DwarfException::EmitExceptionTable() {
         // Nope, it was just some random label.
         continue;
 
-      PadRange P = L->second;
+      const PadRange &P = L->second;
       const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
-
       assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
              "Inconsistent landing pad map!");
 
-      // If some instruction between the previous try-range and this one may
-      // throw, create a call-site entry with no landing pad for the region
-      // between the try-ranges.
-      if (SawPotentiallyThrowing) {
-        CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+      // For Dwarf exception handling (SjLj handling doesn't use this). If some
+      // instruction between the previous try-range and this one may throw,
+      // create a call-site entry with no landing pad for the region between the
+      // try-ranges.
+      if (SawPotentiallyThrowing &&
+          MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+        CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
       }
@@ -470,12 +523,16 @@ void DwarfException::EmitExceptionTable() {
 
       if (LandingPad->LandingPadLabel) {
         // This try-range is for an invoke.
-        CallSiteEntry Site = {BeginLabel, LastLabel,
-                              LandingPad->LandingPadLabel,
-                              FirstActions[P.PadIndex]};
-
-        // Try to merge with the previous call-site.
-        if (PreviousIsInvoke) {
+        CallSiteEntry Site = {
+          BeginLabel,
+          LastLabel,
+          LandingPad->LandingPadLabel,
+          FirstActions[P.PadIndex]
+        };
+
+        // Try to merge with the previous call-site. SJLJ doesn't do this
+        if (PreviousIsInvoke &&
+          MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
           CallSiteEntry &Prev = CallSites.back();
           if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
@@ -497,128 +554,363 @@ void DwarfException::EmitExceptionTable() {
   // If some instruction between the previous try-range and the end of the
   // function may throw, create a call-site entry with no landing pad for the
   // region following the try-range.
-  if (SawPotentiallyThrowing) {
-    CallSiteEntry Site = {LastLabel, 0, 0, 0};
+  if (SawPotentiallyThrowing &&
+      MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+    CallSiteEntry Site = { LastLabel, 0, 0, 0 };
     CallSites.push_back(Site);
   }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy.  First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+///  1. The landing pad site information describes the range of code covered by
+///     the try.  In our case it's an accumulation of the ranges covered by the
+///     invokes in the try.  There is also a reference to the landing pad that
+///     handles the exception once processed.  Finally an index into the actions
+///     table.
+///  2. The action table, in our case, is composed of pairs of type IDs and next
+///     action offset.  Starting with the action index from the landing pad
+///     site, each type ID is checked for a match to the current exception.  If
+///     it matches then the exception and type id are passed on to the landing
+///     pad.  Otherwise the next action is looked up.  This chain is terminated
+///     with a next action of zero.  If no type id is found then the frame is
+///     unwound and handling continues.
+///  3. Type ID table contains references to all the C++ typeinfo for all
+///     catches in the function.  This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  if (PadInfos.empty()) return;
+
+  // Sort the landing pads in order of their type ids.  This is used to fold
+  // duplicate actions.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+
+  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+    LandingPads.push_back(&PadInfos[i]);
+
+  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+  // Compute the actions table and gather the first action index for each
+  // landing pad site.
+  SmallVector<ActionEntry, 32> Actions;
+  SmallVector<unsigned, 64> FirstActions;
+  unsigned SizeActions = ComputeActionsTable(LandingPads, Actions,
+                                             FirstActions);
+
+  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
+  // try-ranges for them need be deduced when using DWARF exception handling.
+  RangeMapType PadMap;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LandingPad = LandingPads[i];
+    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+      unsigned BeginLabel = LandingPad->BeginLabels[j];
+      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+      PadRange P = { i, j };
+      PadMap[BeginLabel] = P;
+    }
+  }
+
+  // Compute the call-site table.
+  SmallVector<CallSiteEntry, 64> CallSites;
+  ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
 
   // Final tallies.
 
   // Call sites.
-  const unsigned SiteStartSize  = sizeof(int32_t); // DW_EH_PE_udata4
-  const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4
-  const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4
-  unsigned SizeSites = CallSites.size() * (SiteStartSize +
-                                           SiteLengthSize +
-                                           LandingPadSize);
-  for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
-    SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+  const unsigned SiteStartSize  = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+  const unsigned SiteLengthSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+  const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+  bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+  bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+  unsigned SizeSites;
+
+  if (IsSJLJ)
+    SizeSites = 0;
+  else
+    SizeSites = CallSites.size() *
+      (SiteStartSize + SiteLengthSize + LandingPadSize);
+
+  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+    if (IsSJLJ)
+      SizeSites += MCAsmInfo::getULEB128Size(i);
+  }
 
   // Type infos.
-  const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr
-  unsigned SizeTypes = TypeInfos.size() * TypeInfoSize;
-
-  unsigned TypeOffset = sizeof(int8_t) + // Call site format
-    TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length
-    SizeSites + SizeActions + SizeTypes;
-
-  unsigned TotalSize = sizeof(int8_t) + // LPStart format
-                       sizeof(int8_t) + // TType format
-           TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
-                       TypeOffset;
+  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+  unsigned TTypeFormat;
+  unsigned TypeFormatSize;
+
+  if (!HaveTTData) {
+    // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+    // that we're omitting that bit.
+    TTypeFormat = dwarf::DW_EH_PE_omit;
+    TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
+  } else {
+    // Okay, we have actual filters or typeinfos to emit.  As such, we need to
+    // pick a type encoding for them.  We're about to emit a list of pointers to
+    // typeinfo objects at the end of the LSDA.  However, unless we're in static
+    // mode, this reference will require a relocation by the dynamic linker.
+    //
+    // Because of this, we have a couple of options:
+    // 
+    //   1) If we are in -static mode, we can always use an absolute reference
+    //      from the LSDA, because the static linker will resolve it.
+    //      
+    //   2) Otherwise, if the LSDA section is writable, we can output the direct
+    //      reference to the typeinfo and allow the dynamic linker to relocate
+    //      it.  Since it is in a writable section, the dynamic linker won't
+    //      have a problem.
+    //      
+    //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+    //      we need to use some form of indirection.  For example, on Darwin,
+    //      we can output a statically-relocatable reference to a dyld stub. The
+    //      offset to the stub is constant, but the contents are in a section
+    //      that is updated by the dynamic linker.  This is easy enough, but we
+    //      need to tell the personality function of the unwinder to indirect
+    //      through the dyld stub.
+    //
+    // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+    // somewhere.  This predicate should be moved to a shared location that is
+    // in target-independent code.
+    //
+    if (LSDASection->getKind().isWriteable() ||
+        Asm->TM.getRelocationModel() == Reloc::Static)
+      TTypeFormat = dwarf::DW_EH_PE_absptr;
+    else
+      TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
 
-  unsigned SizeAlign = (4 - TotalSize) & 3;
+    TypeFormatSize = SizeOfEncodedValue(TTypeFormat);
+  }
 
   // Begin the exception table.
-  Asm->SwitchToDataSection(TAI->getDwarfExceptionSection());
+  Asm->OutStreamer.SwitchSection(LSDASection);
   Asm->EmitAlignment(2, 0, 0, false);
+
   O << "GCC_except_table" << SubprogramCount << ":\n";
 
+  // The type infos need to be aligned. GCC does this by inserting padding just
+  // before the type infos. However, this changes the size of the exception
+  // table, so you need to take this into account when you output the exception
+  // table size. However, the size is output using a variable length encoding.
+  // So by increasing the size by inserting padding, you may increase the number
+  // of bytes used for writing the size. If it increases, say by one byte, then
+  // you now need to output one less byte of padding to get the type infos
+  // aligned.  However this decreases the size of the exception table. This
+  // changes the value you have to output for the exception table size. Due to
+  // the variable length encoding, the number of bytes used for writing the
+  // length may decrease. If so, you then have to increase the amount of
+  // padding. And so on. If you look carefully at the GCC code you will see that
+  // it indeed does this in a loop, going on and on until the values stabilize.
+  // We chose another solution: don't output padding inside the table like GCC
+  // does, instead output it before the table.
+  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+  unsigned TyOffset = sizeof(int8_t) +          // Call site format
+    MCAsmInfo::getULEB128Size(SizeSites) +      // Call-site table length
+    SizeSites + SizeActions + SizeTypes;
+  unsigned TotalSize = sizeof(int8_t) +         // LPStart format
+                       sizeof(int8_t) +         // TType format
+    (HaveTTData ?
+     MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset
+    TyOffset;
+  unsigned SizeAlign = (4 - TotalSize) & 3;
+
   for (unsigned i = 0; i != SizeAlign; ++i) {
     Asm->EmitInt8(0);
     Asm->EOL("Padding");
-    }
+  }
 
   EmitLabel("exception", SubprogramCount);
 
+  if (IsSJLJ) {
+    SmallString<16> LSDAName;
+    raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+      "_LSDA_" << Asm->getFunctionNumber();
+    O << LSDAName.str() << ":\n";
+  }
+
   // Emit the header.
   Asm->EmitInt8(dwarf::DW_EH_PE_omit);
-  Asm->EOL("LPStart format (DW_EH_PE_omit)");
-  Asm->EmitInt8(dwarf::DW_EH_PE_absptr);
-  Asm->EOL("TType format (DW_EH_PE_absptr)");
-  Asm->EmitULEB128Bytes(TypeOffset);
-  Asm->EOL("TType base offset");
-  Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
-  Asm->EOL("Call site format (DW_EH_PE_udata4)");
-  Asm->EmitULEB128Bytes(SizeSites);
-  Asm->EOL("Call-site table length");
-
-  // Emit the landing pad site information.
-  for (unsigned i = 0; i < CallSites.size(); ++i) {
-    CallSiteEntry &S = CallSites[i];
-    const char *BeginTag;
-    unsigned BeginNumber;
-
-    if (!S.BeginLabel) {
-      BeginTag = "eh_func_begin";
-      BeginNumber = SubprogramCount;
-    } else {
-      BeginTag = "label";
-      BeginNumber = S.BeginLabel;
-    }
+  Asm->EOL("@LPStart format", dwarf::DW_EH_PE_omit);
 
-    EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
-                      true, true);
-    Asm->EOL("Region start");
+  Asm->EmitInt8(TTypeFormat);
+  Asm->EOL("@TType format", TTypeFormat);
 
-    if (!S.EndLabel)
-      EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
-                     true);
-    else
-      EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+  if (HaveTTData) {
+    Asm->EmitULEB128Bytes(TyOffset);
+    Asm->EOL("@TType base offset");
+  }
 
-    Asm->EOL("Region length");
+  // SjLj Exception handling
+  if (IsSJLJ) {
+    Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
+    Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4);
+    Asm->EmitULEB128Bytes(SizeSites);
+    Asm->EOL("Call site table length");
+
+    // Emit the landing pad site information.
+    unsigned idx = 0;
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+      const CallSiteEntry &S = *I;
+
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      Asm->EmitULEB128Bytes(idx);
+      Asm->EOL("Landing pad");
+
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128Bytes(S.Action);
+      Asm->EOL("Action");
+    }
+  } else {
+    // DWARF Exception handling
+    assert(MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+
+    // The call-site table is a list of all call sites that may throw an
+    // exception (including C++ 'throw' statements) in the procedure
+    // fragment. It immediately follows the LSDA header. Each entry indicates,
+    // for a given call, the first corresponding action record and corresponding
+    // landing pad.
+    //
+    // The table begins with the number of bytes, stored as an LEB128
+    // compressed, unsigned integer. The records immediately follow the record
+    // count. They are sorted in increasing call-site address. Each record
+    // indicates:
+    //
+    //   * The position of the call-site.
+    //   * The position of the landing pad.
+    //   * The first action record for that call site.
+    //
+    // A missing entry in the call-site table indicates that a call is not
+    // supposed to throw.
+
+    // Emit the landing pad call site table.
+    Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
+    Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4);
+    Asm->EmitULEB128Bytes(SizeSites);
+    Asm->EOL("Call site table size");
+
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+      const CallSiteEntry &S = *I;
+      const char *BeginTag;
+      unsigned BeginNumber;
+
+      if (!S.BeginLabel) {
+        BeginTag = "eh_func_begin";
+        BeginNumber = SubprogramCount;
+      } else {
+        BeginTag = "label";
+        BeginNumber = S.BeginLabel;
+      }
 
-    if (!S.PadLabel)
-      Asm->EmitInt32(0);
-    else
-      EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+      // Offset of the call site relative to the previous call site, counted in
+      // number of 16-byte bundles. The first call site is counted relative to
+      // the start of the procedure fragment.
+      EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
                         true, true);
+      Asm->EOL("Region start");
+
+      if (!S.EndLabel)
+        EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
+                       true);
+      else
+        EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+
+      Asm->EOL("Region length");
 
-    Asm->EOL("Landing pad");
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      if (!S.PadLabel)
+        Asm->EmitInt32(0);
+      else
+        EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+                          true, true);
+
+      Asm->EOL("Landing pad");
 
-    Asm->EmitULEB128Bytes(S.Action);
-    Asm->EOL("Action");
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128Bytes(S.Action);
+      Asm->EOL("Action");
+    }
   }
 
-  // Emit the actions.
-  for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
-    ActionEntry &Action = Actions[I];
+  // Emit the Action Table.
+  for (SmallVectorImpl<ActionEntry>::const_iterator
+         I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+    const ActionEntry &Action = *I;
+
+    // Type Filter
+    //
+    //   Used by the runtime to match the type of the thrown exception to the
+    //   type of the catch clauses or the types in the exception specification.
 
     Asm->EmitSLEB128Bytes(Action.ValueForTypeID);
     Asm->EOL("TypeInfo index");
+
+    // Action Record
+    //
+    //   Self-relative signed displacement in bytes of the next action record,
+    //   or 0 if there is no next action record.
+
     Asm->EmitSLEB128Bytes(Action.NextAction);
     Asm->EOL("Next action");
   }
 
-  // Emit the type ids.
-  for (unsigned M = TypeInfos.size(); M; --M) {
-    GlobalVariable *GV = TypeInfos[M - 1];
+  // Emit the Catch Clauses. The code for the catch clauses following the same
+  // try is similar to a switch statement. The catch clause action record
+  // informs the runtime about the type of a catch clause and about the
+  // associated switch value.
+  //
+  //  Action Record Fields:
+  //
+  //   * Filter Value
+  //     Positive value, starting at 1. Index in the types table of the
+  //     __typeinfo for the catch-clause type. 1 is the first word preceding
+  //     TTBase, 2 is the second word, and so on. Used by the runtime to check
+  //     if the thrown exception type matches the catch-clause type. Back-end
+  //     generated switch statements check against this value.
+  //
+  //   * Next
+  //     Signed offset, in bytes from the start of this field, to the next
+  //     chained action record, or zero if none.
+  //
+  // The order of the action records determined by the next field is the order
+  // of the catch clauses as they appear in the source code, and must be kept in
+  // the same order. As a result, changing the order of the catch clause would
+  // change the semantics of the program.
+  for (std::vector<GlobalVariable *>::const_reverse_iterator
+         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+    const GlobalVariable *GV = *I;
     PrintRelDirective();
 
     if (GV) {
-      std::string GLN;
-      O << Asm->getGlobalLinkName(GV, GLN);
+      O << Asm->Mang->getMangledName(GV);
     } else {
-      O << "0";
+      O << "0x0";
     }
 
     Asm->EOL("TypeInfo");
   }
 
-  // Emit the filter typeids.
-  for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
-    unsigned TypeID = FilterIds[j];
+  // Emit the Type Table.
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+    unsigned TypeID = *I;
     Asm->EmitULEB128Bytes(TypeID);
     Asm->EOL("Filter TypeInfo index");
   }
@@ -629,48 +921,53 @@ void DwarfException::EmitExceptionTable() {
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
+  if (MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
+    return;
+
+  if (!shouldEmitMovesModule && !shouldEmitTableModule)
+    return;
+
   if (TimePassesIsEnabled)
     ExceptionTimer->startTimer();
 
-  if (shouldEmitMovesModule || shouldEmitTableModule) {
-    const std::vector<Function *> Personalities = MMI->getPersonalities();
-    for (unsigned i = 0; i < Personalities.size(); ++i)
-      EmitCommonEHFrame(Personalities[i], i);
+  const std::vector<Function *> Personalities = MMI->getPersonalities();
 
-    for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(),
-           E = EHFrames.end(); I != E; ++I)
-      EmitEHFrame(*I);
-  }
+  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+    EmitCIE(Personalities[I], I);
+
+  for (std::vector<FunctionEHFrameInfo>::iterator
+         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+    EmitFDE(*I);
 
   if (TimePassesIsEnabled)
     ExceptionTimer->stopTimer();
 }
 
-/// BeginFunction - Gather pre-function exception information.  Assumes being
-/// emitted immediately after the function entry point.
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(MachineFunction *MF) {
+  if (!MMI || !MAI->doesSupportExceptionHandling()) return;
+
   if (TimePassesIsEnabled)
     ExceptionTimer->startTimer();
 
   this->MF = MF;
   shouldEmitTable = shouldEmitMoves = false;
 
-  if (MMI && TAI->doesSupportExceptionHandling()) {
-    // Map all labels and get rid of any dead landing pads.
-    MMI->TidyLandingPads();
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
 
-    // If any landing pads survive, we need an EH table.
-    if (MMI->getLandingPads().size())
-      shouldEmitTable = true;
+  // If any landing pads survive, we need an EH table.
+  if (!MMI->getLandingPads().empty())
+    shouldEmitTable = true;
 
-    // See if we need frame move info.
-    if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
-      shouldEmitMoves = true;
+  // See if we need frame move info.
+  if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+    shouldEmitMoves = true;
 
-    if (shouldEmitMoves || shouldEmitTable)
-      // Assumes in correct section after the entry point.
-      EmitLabel("eh_func_begin", ++SubprogramCount);
-  }
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    EmitLabel("eh_func_begin", ++SubprogramCount);
 
   shouldEmitTableModule |= shouldEmitTable;
   shouldEmitMovesModule |= shouldEmitMoves;
@@ -682,25 +979,29 @@ void DwarfException::BeginFunction(MachineFunction *MF) {
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  if (TimePassesIsEnabled) 
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  if (TimePassesIsEnabled)
     ExceptionTimer->startTimer();
 
-  if (shouldEmitMoves || shouldEmitTable) {
-    EmitLabel("eh_func_end", SubprogramCount);
-    EmitExceptionTable();
-
-    // Save EH frame information
-    std::string Name;
-    EHFrames.push_back(
-        FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name),
-                            SubprogramCount,
-                            MMI->getPersonalityIndex(),
-                            MF->getFrameInfo()->hasCalls(),
-                            !MMI->getLandingPads().empty(),
-                            MMI->getFrameMoves(),
-                            MF->getFunction()));
-  }
+  EmitLabel("eh_func_end", SubprogramCount);
+  EmitExceptionTable();
 
-  if (TimePassesIsEnabled) 
+  std::string FunctionEHName =
+    Asm->Mang->getMangledName(MF->getFunction(), ".eh",
+                              Asm->MAI->is_EHSymbolPrivate());
+  
+  // Save EH frame information
+  EHFrames.push_back(FunctionEHFrameInfo(FunctionEHName, SubprogramCount,
+                                         MMI->getPersonalityIndex(),
+                                         MF->getFrameInfo()->hasCalls(),
+                                         !MMI->getLandingPads().empty(),
+                                         MMI->getFrameMoves(),
+                                         MF->getFunction()));
+
+  // Record if this personality index uses a landing pad.
+  UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty();
+
+  if (TimePassesIsEnabled)
     ExceptionTimer->stopTimer();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f1c3e56..f6f5025 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
-#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 
 #include "DIE.h"
 #include "DwarfPrinter.h"
@@ -24,7 +24,7 @@ namespace llvm {
 
 struct LandingPadInfo;
 class MachineModuleInfo;
-class TargetAsmInfo;
+class MCAsmInfo;
 class Timer;
 class raw_ostream;
 
@@ -51,6 +51,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
 
   std::vector<FunctionEHFrameInfo> EHFrames;
 
+  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+  /// uses an LSDA. If so, then we need to encode that information in the CIE's
+  /// augmentation.
+  DenseMap<unsigned, bool> UsesLSDA;
+
   /// shouldEmitTable - Per-function flag to indicate if EH tables should
   /// be emitted.
   bool shouldEmitTable;
@@ -70,13 +75,16 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
   /// ExceptionTimer - Timer for the Dwarf exception writer.
   Timer *ExceptionTimer;
 
-  /// EmitCommonEHFrame - Emit the common eh unwind frame.
-  ///
-  void EmitCommonEHFrame(const Function *Personality, unsigned Index);
+  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
+  unsigned SizeOfEncodedValue(unsigned Encoding);
 
-  /// EmitEHFrame - Emit function exception frame information.
-  ///
-  void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo);
+  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+  /// that is shared among many Frame Description Entries.  There is at least
+  /// one CIE in every non-empty .debug_frame section.
+  void EmitCIE(const Function *Personality, unsigned Index);
+
+  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
 
   /// EmitExceptionTable - Emit landing pads and actions.
   ///
@@ -113,13 +121,6 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
     static bool isPod() { return true; }
   };
 
-  /// ActionEntry - Structure describing an entry in the actions table.
-  struct ActionEntry {
-    int ValueForTypeID; // The value to write - may not be equal to the type id.
-    int NextAction;
-    struct ActionEntry *Previous;
-  };
-
   /// PadRange - Structure holding a try-range and the associated landing pad.
   struct PadRange {
     // The index of the landing pad.
@@ -130,23 +131,48 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
 
   typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
 
+  /// ActionEntry - Structure describing an entry in the actions table.
+  struct ActionEntry {
+    int ValueForTypeID; // The value to write - may not be equal to the type id.
+    int NextAction;
+    struct ActionEntry *Previous;
+  };
+
   /// CallSiteEntry - Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
     unsigned BeginLabel; // zero indicates the start of the function.
     unsigned EndLabel;   // zero indicates the end of the function.
+
     // The landing pad starts at PadLabel.
     unsigned PadLabel;   // zero indicates that there is no landing pad.
     unsigned Action;
   };
 
+  /// ComputeActionsTable - Compute the actions table and gather the first
+  /// action index for each landing pad site.
+  unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+                               SmallVectorImpl<ActionEntry> &Actions,
+                               SmallVectorImpl<unsigned> &FirstActions);
+
+  /// ComputeCallSiteTable - Compute the call-site table.  The entry for an
+  /// invoke has a try-range containing the call, a non-zero landing pad and an
+  /// appropriate action.  The entry for an ordinary call has a try-range
+  /// containing the call and zero for the landing pad and the action.  Calls
+  /// marked 'nounwind' have no entry and must not be contained in the try-range
+  /// of any entry - they form gaps in the table.  Entries must be ordered by
+  /// try-range address.
+  void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                            const RangeMapType &PadMap,
+                            const SmallVectorImpl<const LandingPadInfo *> &LPs,
+                            const SmallVectorImpl<unsigned> &FirstActions);
   void EmitExceptionTable();
 
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+  DwarfException(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
   virtual ~DwarfException();
 
   /// BeginModule - Emit all exception information that should come prior to the
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
index 8021b7c..6e9293a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
@@ -13,7 +13,7 @@
 
 #include "DwarfLabel.h"
 #include "llvm/ADT/FoldingSet.h"
-#include <ostream>
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -25,10 +25,7 @@ void DWLabel::Profile(FoldingSetNodeID &ID) const {
 }
 
 #ifndef NDEBUG
-void DWLabel::print(std::ostream *O) const {
-  if (O) print(*O);
-}
-void DWLabel::print(std::ostream &O) const {
+void DWLabel::print(raw_ostream &O) const {
   O << "." << Tag;
   if (Number) O << Number;
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h
index b493903..0c0cc4b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfLabel.h
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h
@@ -14,19 +14,16 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__
 #define CODEGEN_ASMPRINTER_DWARFLABEL_H__
 
-#include "llvm/Support/Compiler.h"
-#include <iosfwd>
-#include <vector>
-
 namespace llvm {
   class FoldingSetNodeID;
+  class raw_ostream;
 
   //===--------------------------------------------------------------------===//
   /// DWLabel - Labels are used to track locations in the assembler file.
   /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim,
   /// where the tag is a category of label (Ex. location) and number is a value
   /// unique in that category.
-  class VISIBILITY_HIDDEN DWLabel {
+  class DWLabel {
     /// Tag - Label category tag. Should always be a statically declared C
     /// string.
     /// 
@@ -47,8 +44,7 @@ namespace llvm {
     void Profile(FoldingSetNodeID &ID) const;
 
 #ifndef NDEBUG
-    void print(std::ostream *O) const;
-    void print(std::ostream &O) const;
+    void print(raw_ostream &O) const;
 #endif
   };
 } // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index a1b97df..20b959b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -15,39 +15,41 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
-Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
              const char *flavor)
-: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()),
+: O(OS), Asm(A), MAI(T), TD(Asm->TM.getTargetData()),
   RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
   SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
 
 void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const {
-  if (isInSection && TAI->getDwarfSectionOffsetDirective())
-    O << TAI->getDwarfSectionOffsetDirective();
+  if (isInSection && MAI->getDwarfSectionOffsetDirective())
+    O << MAI->getDwarfSectionOffsetDirective();
   else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t))
-    O << TAI->getData32bitsDirective();
+    O << MAI->getData32bitsDirective();
   else
-    O << TAI->getData64bitsDirective();
+    O << MAI->getData64bitsDirective();
 }
 
 /// PrintLabelName - Print label name in form used by Dwarf writer.
 ///
 void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const {
-  O << TAI->getPrivateGlobalPrefix() << Tag;
+  O << MAI->getPrivateGlobalPrefix() << Tag;
   if (Number) O << Number;
 }
 void Dwarf::PrintLabelName(const char *Tag, unsigned Number,
                            const char *Suffix) const {
-  O << TAI->getPrivateGlobalPrefix() << Tag;
+  O << MAI->getPrivateGlobalPrefix() << Tag;
   if (Number) O << Number;
   O << Suffix;
 }
@@ -65,13 +67,13 @@ void Dwarf::EmitReference(const char *Tag, unsigned Number,
                           bool IsPCRelative, bool Force32Bit) const {
   PrintRelDirective(Force32Bit);
   PrintLabelName(Tag, Number);
-  if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
 }
 void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative,
                           bool Force32Bit) const {
   PrintRelDirective(Force32Bit);
   O << Name;
-  if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
 }
 
 /// EmitDifference - Emit the difference between two labels.  Some assemblers do
@@ -80,7 +82,7 @@ void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative,
 void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi,
                            const char *TagLo, unsigned NumberLo,
                            bool IsSmall) {
-  if (TAI->needsSet()) {
+  if (MAI->needsSet()) {
     O << "\t.set\t";
     PrintLabelName("set", SetCounter, Flavor);
     O << ",";
@@ -106,11 +108,11 @@ void Dwarf::EmitSectionOffset(const char* Label, const char* Section,
                               bool useSet) {
   bool printAbsolute = false;
   if (isEH)
-    printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+    printAbsolute = MAI->isAbsoluteEHSectionOffsets();
   else
-    printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+    printAbsolute = MAI->isAbsoluteDebugSectionOffsets();
 
-  if (TAI->needsSet() && useSet) {
+  if (MAI->needsSet() && useSet) {
     O << "\t.set\t";
     PrintLabelName("set", SetCounter, Flavor);
     O << ",";
@@ -190,7 +192,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(Offset);
         Asm->EOL("Offset");
       } else {
-        assert(0 && "Machine move not supported yet.");
+        llvm_unreachable("Machine move not supported yet.");
       }
     } else if (Src.isReg() &&
                Src.getReg() == MachineLocation::VirtualFP) {
@@ -200,7 +202,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
         Asm->EOL("Register");
       } else {
-        assert(0 && "Machine move not supported yet.");
+        llvm_unreachable("Machine move not supported yet.");
       }
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 6e75992..33ebb3b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -25,7 +25,7 @@ namespace llvm {
   class MachineFunction;
   class MachineModuleInfo;
   class Module;
-  class TargetAsmInfo;
+  class MCAsmInfo;
   class TargetData;
   class TargetRegisterInfo;
 
@@ -43,9 +43,9 @@ namespace llvm {
     ///
     AsmPrinter *Asm;
 
-    /// TAI - Target asm information.
+    /// MAI - Target asm information.
     /// 
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     /// TD - Target data.
     /// 
@@ -80,7 +80,7 @@ namespace llvm {
     /// 
     unsigned SetCounter;
 
-    Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+    Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
           const char *flavor);
   public:
     //===------------------------------------------------------------------===//
@@ -88,7 +88,7 @@ namespace llvm {
     //
     const AsmPrinter *getAsm() const { return Asm; }
     MachineModuleInfo *getMMI() const { return MMI; }
-    const TargetAsmInfo *getTargetAsmInfo() const { return TAI; }
+    const MCAsmInfo *getMCAsmInfo() const { return MAI; }
     const TargetData *getTargetData() const { return TD; }
 
     void PrintRelDirective(bool Force32Bit = false,
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 89084989..0638d35 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -39,7 +39,7 @@ DwarfWriter::~DwarfWriter() {
 void DwarfWriter::BeginModule(Module *M,
                               MachineModuleInfo *MMI,
                               raw_ostream &OS, AsmPrinter *A,
-                              const TargetAsmInfo *T) {
+                              const MCAsmInfo *T) {
   DE = new DwarfException(OS, A, T);
   DD = new DwarfDebug(OS, A, T);
   DE->BeginModule(M, MMI);
@@ -51,6 +51,8 @@ void DwarfWriter::BeginModule(Module *M,
 void DwarfWriter::EndModule() {
   DE->EndModule();
   DD->EndModule();
+  delete DD; DD = 0;
+  delete DE; DE = 0;
 }
 
 /// BeginFunction - Gather pre-function debug information.  Assumes being
@@ -75,18 +77,18 @@ void DwarfWriter::EndFunction(MachineFunction *MF) {
 /// label. Returns a unique label ID used to generate a label and provide
 /// correspondence to the source line list.
 unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, 
-                                       DICompileUnit CU) {
-  return DD->RecordSourceLine(Line, Col, CU);
+                                       MDNode *Scope) {
+  return DD->RecordSourceLine(Line, Col, Scope);
 }
 
 /// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) {
-  return DD->RecordRegionStart(V);
+unsigned DwarfWriter::RecordRegionStart(MDNode *N) {
+  return DD->RecordRegionStart(N);
 }
 
 /// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) {
-  return DD->RecordRegionEnd(V);
+unsigned DwarfWriter::RecordRegionEnd(MDNode *N) {
+  return DD->RecordRegionEnd(N);
 }
 
 /// getRecordSourceLineCount - Count source lines.
@@ -96,9 +98,8 @@ unsigned DwarfWriter::getRecordSourceLineCount() {
 
 /// RecordVariable - Indicate the declaration of  a local variable.
 ///
-void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
-                                 const MachineInstr *MI) {
-  DD->RecordVariable(GV, FrameIndex, MI);
+void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) {
+  DD->RecordVariable(N, FrameIndex);
 }
 
 /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
@@ -107,8 +108,7 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const {
   return DD && DD->ShouldEmitDwarfDebug();
 }
 
-//// RecordInlinedFnStart - Global variable GV is inlined at the location marked
-//// by LabelID label.
+//// RecordInlinedFnStart
 unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
                                            unsigned Line, unsigned Col) {
   return DD->RecordInlinedFnStart(SP, CU, Line, Col);
@@ -119,9 +119,9 @@ unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
   return DD->RecordInlinedFnEnd(SP);
 }
 
-/// RecordVariableScope - Record scope for the variable declared by
-/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE.
-void DwarfWriter::RecordVariableScope(DIVariable &DV,
-                                      const MachineInstr *DeclareMI) {
-  DD->RecordVariableScope(DV, DeclareMI);
+void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) {
+  DD->SetDbgScopeEndLabels(MI, L);
+}
+void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) {
+  DD->SetDbgScopeBeginLabels(MI, L);
 }
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 8ba903a..06b92b7 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,12 +15,14 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/Module.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -28,10 +30,10 @@ namespace {
   class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter {
   public:
     void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                       const TargetAsmInfo &TAI);
+                       const MCAsmInfo &MAI);
 
     void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                        const TargetAsmInfo &TAI);
+                        const MCAsmInfo &MAI);
   };
 
 }
@@ -42,11 +44,11 @@ Y("ocaml", "ocaml 3.10-compatible collector");
 void llvm::linkOcamlGCPrinter() { }
 
 static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
-                           const TargetAsmInfo &TAI, const char *Id) {
+                           const MCAsmInfo &MAI, const char *Id) {
   const std::string &MId = M.getModuleIdentifier();
 
   std::string Mangled;
-  Mangled += TAI.getGlobalPrefix();
+  Mangled += MAI.getGlobalPrefix();
   Mangled += "caml";
   size_t Letter = Mangled.size();
   Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
@@ -56,18 +58,18 @@ static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
   // Capitalize the first letter of the module name.
   Mangled[Letter] = toupper(Mangled[Letter]);
 
-  if (const char *GlobalDirective = TAI.getGlobalDirective())
+  if (const char *GlobalDirective = MAI.getGlobalDirective())
     OS << GlobalDirective << Mangled << "\n";
   OS << Mangled << ":\n";
 }
 
 void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                           const TargetAsmInfo &TAI) {
-  AP.SwitchToSection(TAI.getTextSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin");
+                                           const MCAsmInfo &MAI) {
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "code_begin");
 
-  AP.SwitchToSection(TAI.getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "data_begin");
 }
 
 /// emitAssembly - Print the frametable. The ocaml frametable format is thus:
@@ -87,55 +89,59 @@ void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
 /// either condition is detected in a function which uses the GC.
 ///
 void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                            const TargetAsmInfo &TAI) {
+                                            const MCAsmInfo &MAI) {
   const char *AddressDirective;
   int AddressAlignLog;
   if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
-    AddressDirective = TAI.getData32bitsDirective();
+    AddressDirective = MAI.getData32bitsDirective();
     AddressAlignLog = 2;
   } else {
-    AddressDirective = TAI.getData64bitsDirective();
+    AddressDirective = MAI.getData64bitsDirective();
     AddressAlignLog = 3;
   }
 
-  AP.SwitchToSection(TAI.getTextSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "code_end");
 
-  AP.SwitchToSection(TAI.getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "data_end");
 
   OS << AddressDirective << 0; // FIXME: Why does ocaml emit this??
   AP.EOL();
 
-  AP.SwitchToSection(TAI.getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "frametable");
 
   for (iterator I = begin(), IE = end(); I != IE; ++I) {
     GCFunctionInfo &FI = **I;
 
     uint64_t FrameSize = FI.getFrameSize();
     if (FrameSize >= 1<<16) {
-      cerr << "Function '" << FI.getFunction().getNameStart()
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Function '" << FI.getFunction().getName()
            << "' is too large for the ocaml GC! "
            << "Frame size " << FrameSize << " >= 65536.\n";
-      cerr << "(" << uintptr_t(&FI) << ")\n";
-      abort(); // Very rude!
+      Msg << "(" << uintptr_t(&FI) << ")";
+      llvm_report_error(Msg.str()); // Very rude!
     }
 
-    OS << "\t" << TAI.getCommentString() << " live roots for "
-       << FI.getFunction().getNameStart() << "\n";
+    OS << "\t" << MAI.getCommentString() << " live roots for "
+       << FI.getFunction().getName() << "\n";
 
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
       if (LiveCount >= 1<<16) {
-        cerr << "Function '" << FI.getFunction().getNameStart()
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "Function '" << FI.getFunction().getName()
              << "' is too large for the ocaml GC! "
-             << "Live root count " << LiveCount << " >= 65536.\n";
-        abort(); // Very rude!
+             << "Live root count " << LiveCount << " >= 65536.";
+        llvm_report_error(Msg.str()); // Very rude!
       }
 
       OS << AddressDirective
-         << TAI.getPrivateGlobalPrefix() << "label" << J->Num;
+         << MAI.getPrivateGlobalPrefix() << "label" << J->Num;
       AP.EOL("call return address");
 
       AP.EmitInt16(FrameSize);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 2635303..f9abeac 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -27,6 +28,8 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -44,70 +47,35 @@ TailMergeThreshold("tail-merge-threshold",
           cl::desc("Max number of predecessors to consider tail merging"),
           cl::init(150), cl::Hidden);
 
-namespace {
-  struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass {
-    static char ID;
-    explicit BranchFolder(bool defaultEnableTailMerge) : 
-      MachineFunctionPass(&ID) {
-      switch (FlagEnableTailMerge) {
-        case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
-        case cl::BOU_TRUE: EnableTailMerge = true; break;
-        case cl::BOU_FALSE: EnableTailMerge = false; break;
-      }
-    }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
-    const TargetInstrInfo *TII;
-    MachineModuleInfo *MMI;
-    bool MadeChange;
-  private:
-    // Tail Merging.
-    bool EnableTailMerge;
-    bool TailMergeBlocks(MachineFunction &MF);
-    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
-                        MachineBasicBlock* PredBB);
-    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
-                                 MachineBasicBlock *NewDest);
-    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
-                                  MachineBasicBlock::iterator BBI1);
-    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
-    void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
-                                                MachineBasicBlock* PredBB);
-    unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
-                                       unsigned maxCommonTailLength);
-
-    typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
-    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
-    std::vector<MergePotentialsElt> MergePotentials;
-
-    typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
-    std::vector<SameTailElt> SameTails;
-
-    const TargetRegisterInfo *RegInfo;
-    RegScavenger *RS;
-    // Branch optzn.
-    bool OptimizeBranches(MachineFunction &MF);
-    void OptimizeBlock(MachineBasicBlock *MBB);
-    void RemoveDeadBlock(MachineBasicBlock *MBB);
-    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
-    
-    bool CanFallThrough(MachineBasicBlock *CurBB);
-    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
-                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond);
-  };
-  char BranchFolder::ID = 0;
-}
+char BranchFolderPass::ID = 0;
 
 FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { 
-      return new BranchFolder(DefaultEnableTailMerge); }
+  return new BranchFolderPass(DefaultEnableTailMerge);
+}
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+  return OptimizeFunction(MF,
+                          MF.getTarget().getInstrInfo(),
+                          MF.getTarget().getRegisterInfo(),
+                          getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+  switch (FlagEnableTailMerge) {
+  case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+  case cl::BOU_TRUE: EnableTailMerge = true; break;
+  case cl::BOU_FALSE: EnableTailMerge = false; break;
+  }
+}
 
 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
 /// function, updating the CFG.
 void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
-  DOUT << "\nRemoving MBB: " << *MBB;
+  DEBUG(errs() << "\nRemoving MBB: " << *MBB);
   
   MachineFunction *MF = MBB->getParent();
   // drop all successors.
@@ -146,7 +114,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
       break;
     unsigned Reg = I->getOperand(0).getReg();
     ImpDefRegs.insert(Reg);
-    for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs)
       ImpDefRegs.insert(SubReg);
     ++I;
@@ -180,32 +148,37 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
   return true;
 }
 
-bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  if (!TII) return false;
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+                                    const TargetInstrInfo *tii,
+                                    const TargetRegisterInfo *tri,
+                                    MachineModuleInfo *mmi) {
+  if (!tii) return false;
+
+  TII = tii;
+  TRI = tri;
+  MMI = mmi;
 
-  RegInfo = MF.getTarget().getRegisterInfo();
+  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
 
   // Fix CFG.  The later algorithms expect it to be right.
-  bool EverMadeChange = false;
+  bool MadeChange = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
     MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
     SmallVector<MachineOperand, 4> Cond;
     if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
-      EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-    EverMadeChange |= OptimizeImpDefsBlock(MBB);
+      MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+    MadeChange |= OptimizeImpDefsBlock(MBB);
   }
 
-  RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
-
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
 
   bool MadeChangeThisIteration = true;
   while (MadeChangeThisIteration) {
     MadeChangeThisIteration = false;
     MadeChangeThisIteration |= TailMergeBlocks(MF);
     MadeChangeThisIteration |= OptimizeBranches(MF);
-    EverMadeChange |= MadeChangeThisIteration;
+    MadeChange |= MadeChangeThisIteration;
   }
 
   // See if any jump tables have become mergable or dead as the code generator
@@ -222,8 +195,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
 
     // Scan the jump tables, seeing if there are any duplicates.  Note that this
     // is N^2, which should be fixed someday.
-    for (unsigned i = 1, e = JTs.size(); i != e; ++i)
-      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+    for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
+      if (JTs[i].MBBs.empty())
+        JTMapping.push_back(i);
+      else
+        JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+    }
     
     // If a jump table was merge with another one, walk the function rewriting
     // references to jump tables to reference the new JT ID's.  Keep track of
@@ -250,12 +227,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
     for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
       if (!JTIsLive.test(i)) {
         JTI->RemoveJumpTable(i);
-        EverMadeChange = true;
+        MadeChange = true;
       }
   }
-  
+
   delete RS;
-  return EverMadeChange;
+  return MadeChange;
 }
 
 //===----------------------------------------------------------------------===//
@@ -395,9 +372,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
     RS->enterBasicBlock(&CurMBB);
     if (!CurMBB.empty())
       RS->forward(prior(CurMBB.end()));
-    BitVector RegsLiveAtExit(RegInfo->getNumRegs());
+    BitVector RegsLiveAtExit(TRI->getNumRegs());
     RS->getRegsUsed(RegsLiveAtExit, false);
-    for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++)
+    for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++)
       if (RegsLiveAtExit[i])
         NewMBB->addLiveIn(i);
   }
@@ -461,7 +438,7 @@ static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
       // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
       // an object with itself.
 #ifndef _GLIBCXX_DEBUG
-      assert(0 && "Predecessor appears twice");
+      llvm_unreachable("Predecessor appears twice");
 #endif
       return false;
     }
@@ -567,8 +544,8 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
   MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
 
-  DOUT << "\nSplitting " << MBB->getNumber() << ", size " << 
-          maxCommonTailLength;
+  DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size "
+               << maxCommonTailLength);
 
   MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
   SameTails[commonTailIndex].first->second = newMBB;
@@ -590,13 +567,14 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
 
 bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
                                   MachineBasicBlock* PredBB) {
+  bool MadeChange = false;
+
   // It doesn't make sense to save a single instruction since tail merging
   // will add a jump.
   // FIXME: Ask the target to provide the threshold?
   unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
-  MadeChange = false;
   
-  DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n';
+  DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n');
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
@@ -643,17 +621,17 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
     MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
-    DOUT << "\nUsing common tail " << MBB->getNumber() << " for ";
+    DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for ");
     for (unsigned int i=0; i<SameTails.size(); ++i) {
       if (commonTailIndex==i)
         continue;
-      DOUT << SameTails[i].first->second->getNumber() << ",";
+      DEBUG(errs() << SameTails[i].first->second->getNumber() << ",");
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
       ReplaceTailWithBranchTo(SameTails[i].second, MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
       MergePotentials.erase(SameTails[i].first);
     }
-    DOUT << "\n";
+    DEBUG(errs() << "\n");
     // We leave commonTailIndex in the worklist in case there are other blocks
     // that match it with a smaller number of instructions.
     MadeChange = true;
@@ -665,7 +643,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   if (!EnableTailMerge) return false;
  
-  MadeChange = false;
+  bool MadeChange = false;
 
   // First find blocks with no successors.
   MergePotentials.clear();
@@ -699,6 +677,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+      SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
@@ -709,6 +688,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // Skip blocks that loop to themselves, can't tail merge these.
         if (PBB==IBB)
           continue;
+        // Visit each predecessor only once.
+        if (!UniquePreds.insert(PBB))
+          continue;
         MachineBasicBlock *TBB = 0, *FBB = 0;
         SmallVector<MachineOperand, 4> Cond;
         if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
@@ -772,14 +754,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 //===----------------------------------------------------------------------===//
 
 bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
-  MadeChange = false;
+  bool MadeChange = false;
   
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
 
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
-    OptimizeBlock(MBB);
+    MadeChange |= OptimizeBlock(MBB);
     
     // If it is dead, remove it.
     if (MBB->pred_empty()) {
@@ -873,7 +855,9 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
 
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
-void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+  bool MadeChange = false;
+
   MachineFunction::iterator FallThrough = MBB;
   ++FallThrough;
   
@@ -882,7 +866,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   // points to this block.
   if (MBB->empty() && !MBB->isLandingPad()) {
     // Dead block?  Leave for cleanup later.
-    if (MBB->pred_empty()) return;
+    if (MBB->pred_empty()) return MadeChange;
     
     if (FallThrough == MBB->getParent()->end()) {
       // TODO: Simplify preds to not branch here if possible!
@@ -893,14 +877,13 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         MachineBasicBlock *Pred = *(MBB->pred_end()-1);
         Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
       }
-      
       // If MBB was the target of a jump table, update jump tables to go to the
       // fallthrough instead.
       MBB->getParent()->getJumpTableInfo()->
         ReplaceMBBInJumpTables(MBB, FallThrough);
       MadeChange = true;
     }
-    return;
+    return MadeChange;
   }
 
   // Check to see if we can simplify the terminator of the block before this
@@ -1004,8 +987,8 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
         if (!TII->ReverseBranchCondition(NewPriorCond)) {
-          DOUT << "\nMoving MBB: " << *MBB;
-          DOUT << "To make fallthrough to: " << *PriorTBB << "\n";
+          DEBUG(errs() << "\nMoving MBB: " << *MBB
+                       << "To make fallthrough to: " << *PriorTBB << "\n");
           
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
@@ -1014,7 +997,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           MBB->moveAfter(--MBB->getParent()->end());
           MadeChange = true;
           ++NumBranchOpts;
-          return;
+          return MadeChange;
         }
       }
     }
@@ -1116,7 +1099,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           if (DidChange) {
             ++NumBranchOpts;
             MadeChange = true;
-            if (!HasBranchToSelf) return;
+            if (!HasBranchToSelf) return MadeChange;
           }
         }
       }
@@ -1197,8 +1180,10 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           PrevBB.isSuccessor(FallThrough)) {
         MBB->moveAfter(--MBB->getParent()->end());
         MadeChange = true;
-        return;
+        return MadeChange;
       }
     }
   }
+
+  return MadeChange;
 }
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
new file mode 100644
index 0000000..9763e33
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,84 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <vector>
+
+namespace llvm {
+  class MachineFunction;
+  class MachineModuleInfo;
+  class RegScavenger;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+
+  class BranchFolder {
+  public:
+    explicit BranchFolder(bool defaultEnableTailMerge);
+
+    bool OptimizeFunction(MachineFunction &MF,
+                          const TargetInstrInfo *tii,
+                          const TargetRegisterInfo *tri,
+                          MachineModuleInfo *mmi);
+  private:
+    typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+    std::vector<MergePotentialsElt> MergePotentials;
+
+    typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+    std::vector<SameTailElt> SameTails;
+
+    bool EnableTailMerge;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineModuleInfo *MMI;
+    RegScavenger *RS;
+
+    bool TailMergeBlocks(MachineFunction &MF);
+    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+                        MachineBasicBlock* PredBB);
+    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                 MachineBasicBlock *NewDest);
+    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+                                  MachineBasicBlock::iterator BBI1);
+    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+    void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+                                                MachineBasicBlock* PredBB);
+    unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                       unsigned maxCommonTailLength);
+
+    bool OptimizeBranches(MachineFunction &MF);
+    bool OptimizeBlock(MachineBasicBlock *MBB);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+    
+    bool CanFallThrough(MachineBasicBlock *CurBB);
+    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
+                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond);
+  };
+
+
+  /// BranchFolderPass - Wrap branch folder in a machine function pass.
+  class BranchFolderPass : public MachineFunctionPass,
+                           public BranchFolder {
+  public:
+    static char ID;
+    explicit BranchFolderPass(bool defaultEnableTailMerge)
+      :  MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+  };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 48f17d0..713c30c 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMCodeGen
   DwarfEHPrepare.cpp
   ELFCodeEmitter.cpp
   ELFWriter.cpp
+  ExactHazardRecognizer.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCStrategy.cpp
@@ -12,7 +13,6 @@ add_llvm_library(LLVMCodeGen
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
-  LazyLiveness.cpp
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
   LiveStackAnalysis.cpp
@@ -23,27 +23,28 @@ add_llvm_library(LLVMCodeGen
   MachineBasicBlock.cpp
   MachineDominators.cpp
   MachineFunction.cpp
+  MachineFunctionAnalysis.cpp
+  MachineFunctionPass.cpp
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
   MachineModuleInfo.cpp
+  MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
   MachineSink.cpp
   MachineVerifier.cpp
+  ObjectCodeEmitter.cpp
   OcamlGC.cpp
-  PBQP.cpp
   PHIElimination.cpp
   Passes.cpp
   PostRASchedulerList.cpp
   PreAllocSplitting.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
-  RegAllocBigBlock.cpp
   RegAllocLinearScan.cpp
   RegAllocLocal.cpp
   RegAllocPBQP.cpp
-  RegAllocSimple.cpp
   RegisterCoalescer.cpp
   RegisterScavenging.cpp
   ScheduleDAG.cpp
@@ -53,6 +54,7 @@ add_llvm_library(LLVMCodeGen
   ShadowStackGC.cpp
   ShrinkWrapping.cpp
   SimpleRegisterCoalescing.cpp
+  SjLjEHPrepare.cpp
   Spiller.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 383098e..932fae4 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -95,11 +95,11 @@ FunctionPass *llvm::createCodePlacementOptPass() {
 ///       ...
 ///       jmp B
 ///
-///       C:  --> new loop header
+///       C:
 ///       ...
 ///       <fallthough to B>
 ///       
-///       B:
+///       B:  --> loop header
 ///       ...
 ///       jcc <cond> C, [exit]
 ///
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 4832a5e..078ed3d 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
@@ -36,7 +37,7 @@ namespace {
     DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
 
   private:
-    bool isDead(MachineInstr *MI) const;
+    bool isDead(const MachineInstr *MI) const;
   };
 }
 char DeadMachineInstructionElim::ID = 0;
@@ -49,10 +50,10 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() {
   return new DeadMachineInstructionElim();
 }
 
-bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const {
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
   // Don't delete instructions with side effects.
   bool SawStore = false;
-  if (!MI->isSafeToMove(TII, SawStore))
+  if (!MI->isSafeToMove(TII, SawStore, 0))
     return false;
 
   // Examine each operand.
@@ -110,7 +111,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
 
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
-        DOUT << "DeadMachineInstructionElim: DELETING: " << *MI;
+        DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI);
         AnyChanges = true;
         MI->eraseFromParent();
         MIE = MBB->rend();
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 720e3d1..72b3f92 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -107,7 +107,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
 
 /// NormalizeLandingPads - Normalize and discover landing pads, noting them
 /// in the LandingPads set.  A landing pad is normal if the only CFG edges
-/// that end at it are unwind edges from invoke instructions.
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
 /// Abnormal landing pads are fixed up by redirecting all unwind edges to
 /// a new basic block which falls through to the original.
 bool DwarfEHPrepare::NormalizeLandingPads() {
@@ -132,6 +134,7 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
         break;
       }
     }
+
     if (OnlyUnwoundTo) {
       // Only unwind edges lead to the landing pad.  Remember the landing pad.
       LandingPads.insert(LPad);
@@ -142,7 +145,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
     // edges to a new basic block which falls through into this one.
 
     // Create the new basic block.
-    BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge");
+    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+                                           LPad->getName() + "_unwind_edge");
 
     // Insert it into the function right before the original landing pad.
     LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
@@ -218,28 +222,43 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
 /// at runtime if there is no such exception: using unwind to throw a new
 /// exception is currently not supported.
 bool DwarfEHPrepare::LowerUnwinds() {
-  bool Changed = false;
+  SmallVector<TerminatorInst*, 16> UnwindInsts;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
     TerminatorInst *TI = I->getTerminator();
-    if (!isa<UnwindInst>(TI))
-      continue;
+    if (isa<UnwindInst>(TI))
+      UnwindInsts.push_back(TI);
+  }
+
+  if (UnwindInsts.empty()) return false;
+
+  // Find the rewind function if we didn't already.
+  if (!RewindFunction) {
+    LLVMContext &Ctx = UnwindInsts[0]->getContext();
+    std::vector<const Type*>
+      Params(1, Type::getInt8PtrTy(Ctx));
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+                                          Params, false);
+    const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+  }
+
+  bool Changed = false;
+
+  for (SmallVectorImpl<TerminatorInst*>::iterator
+         I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) {
+    TerminatorInst *TI = *I;
 
     // Replace the unwind instruction with a call to _Unwind_Resume (or the
     // appropriate target equivalent) followed by an UnreachableInst.
 
-    // Find the rewind function if we didn't already.
-    if (!RewindFunction) {
-      std::vector<const Type*> Params(1, PointerType::getUnqual(Type::Int8Ty));
-      FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false);
-      const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
-      RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
-    }
-
     // Create the call...
-    CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI);
+    CallInst *CI = CallInst::Create(RewindFunction,
+                                    CreateReadOfExceptionValue(TI->getParent()),
+                                    "", TI);
+    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
     // ...followed by an UnreachableInst.
-    new UnreachableInst(TI);
+    new UnreachableInst(TI->getContext(), TI);
 
     // Nuke the unwind instruction.
     TI->eraseFromParent();
@@ -314,7 +333,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() {
   if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
     // Turn the exception temporary into registers and phi nodes if possible.
     std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
-    PromoteMemToReg(Allocas, *DT, *DF);
+    PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext());
     return true;
   }
   return false;
@@ -354,8 +373,8 @@ Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
 
   // Create the temporary if we didn't already.
   if (!ExceptionValueVar) {
-    ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty),
-                                       "eh.value", F->begin()->begin());
+    ExceptionValueVar = new AllocaInst(PointerType::getUnqual(
+           Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin());
     ++NumStackTempsIntroduced;
   }
 
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index 7e983a4..b466e89 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -52,6 +52,159 @@ namespace llvm {
     EV_CURRENT = 1
   };
 
+  /// ELFSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct ELFSym {
+
+    // ELF symbols are related to llvm ones by being one of the two llvm
+    // types, for the other ones (section, file, func) a null pointer is
+    // assumed by default.
+    union {
+      const GlobalValue *GV;  // If this is a pointer to a GV
+      const char *Ext;        // If this is a pointer to a named symbol
+    } Source;
+
+    // Describes from which source type this ELF symbol comes from,
+    // they can be GlobalValue, ExternalSymbol or neither.
+    enum {
+      isGV,      // The Source.GV field is valid.
+      isExtSym,  // The Source.ExtSym field is valid.
+      isOther    // Not a GlobalValue or External Symbol
+    };
+    unsigned SourceType;
+
+    bool isGlobalValue() const { return SourceType == isGV; }
+    bool isExternalSym() const { return SourceType == isExtSym; }
+
+    // getGlobalValue - If this is a global value which originated the
+    // elf symbol, return a reference to it.
+    const GlobalValue *getGlobalValue() const {
+      assert(SourceType == isGV && "This is not a global value");
+      return Source.GV;
+    };
+
+    // getExternalSym - If this is an external symbol which originated the
+    // elf symbol, return a reference to it.
+    const char *getExternalSymbol() const {
+      assert(SourceType == isExtSym && "This is not an external symbol");
+      return Source.Ext;
+    };
+
+    // getGV - From a global value return a elf symbol to represent it
+    static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
+                         unsigned Type, unsigned Visibility) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(Type);
+      Sym->setVisibility(Visibility);
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // getExtSym - Create and return an elf symbol to represent an
+    // external symbol
+    static ELFSym *getExtSym(const char *Ext) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.Ext = Ext;
+      Sym->setBind(STB_GLOBAL);
+      Sym->setType(STT_NOTYPE);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SourceType = isExtSym;
+      return Sym;
+    }
+
+    // getSectionSym - Returns a elf symbol to represent an elf section
+    static ELFSym *getSectionSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(STB_LOCAL);
+      Sym->setType(STT_SECTION);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getFileSym - Returns a elf symbol to represent the module identifier
+    static ELFSym *getFileSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(STB_LOCAL);
+      Sym->setType(STT_FILE);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SectionIdx = 0xfff1;  // ELFSection::SHN_ABS;
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getUndefGV - Returns a STT_NOTYPE symbol
+    static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(STT_NOTYPE);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SectionIdx = 0;  //ELFSection::SHN_UNDEF;
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // ELF specific fields
+    unsigned NameIdx;         // Index in .strtab of name, once emitted.
+    uint64_t Value;
+    unsigned Size;
+    uint8_t Info;
+    uint8_t Other;
+    unsigned short SectionIdx;
+
+    // Symbol index into the Symbol table
+    unsigned SymTabIdx;
+
+    enum {
+      STB_LOCAL = 0,  // Local sym, not visible outside obj file containing def
+      STB_GLOBAL = 1, // Global sym, visible to all object files being combined
+      STB_WEAK = 2    // Weak symbol, like global but lower-precedence
+    };
+
+    enum {
+      STT_NOTYPE = 0,  // Symbol's type is not specified
+      STT_OBJECT = 1,  // Symbol is a data object (variable, array, etc.)
+      STT_FUNC = 2,    // Symbol is executable code (function, etc.)
+      STT_SECTION = 3, // Symbol refers to a section
+      STT_FILE = 4     // Local, absolute symbol that refers to a file
+    };
+
+    enum {
+      STV_DEFAULT = 0,  // Visibility is specified by binding type
+      STV_INTERNAL = 1, // Defined by processor supplements
+      STV_HIDDEN = 2,   // Not visible to other components
+      STV_PROTECTED = 3 // Visible in other components but not preemptable
+    };
+
+    ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
+               Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0),
+               SymTabIdx(0) {}
+
+    unsigned getBind() const { return (Info >> 4) & 0xf; }
+    unsigned getType() const { return Info & 0xf; }
+    bool isLocalBind() const { return getBind() == STB_LOCAL; }
+    bool isFileType() const { return getType() == STT_FILE; }
+
+    void setBind(unsigned X) {
+      assert(X == (X & 0xF) && "Bind value out of range!");
+      Info = (Info & 0x0F) | (X << 4);
+    }
+
+    void setType(unsigned X) {
+      assert(X == (X & 0xF) && "Type value out of range!");
+      Info = (Info & 0xF0) | X;
+    }
+
+    void setVisibility(unsigned V) {
+      assert(V == (V & 0x3) && "Visibility value out of range!");
+      Other = V;
+    }
+  };
+
   /// ELFSection - This struct contains information about each section that is
   /// emitted to the file.  This is eventually turned into the section header
   /// table at the end of the file.
@@ -117,78 +270,19 @@ namespace llvm {
     /// SectionIdx - The number of the section in the Section Table.
     unsigned short SectionIdx;
 
-    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
-      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
-        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {}
-  };
-
-  /// ELFSym - This struct contains information about each symbol that is
-  /// added to logical symbol table for the module.  This is eventually
-  /// turned into a real symbol table in the file.
-  struct ELFSym {
-    // The global value this corresponds to. Global symbols can be on of the 
-    // 3 types : if this symbol has a zero initializer, it is common or should
-    // be placed in bss section otherwise it's a constant.
-    const GlobalValue *GV;
-    bool IsCommon;
-    bool IsBss;
-    bool IsConstant;
-
-    // ELF specific fields
-    unsigned NameIdx;         // Index in .strtab of name, once emitted.
-    uint64_t Value;
-    unsigned Size;
-    uint8_t Info;
-    uint8_t Other;
-    unsigned short SectionIdx;
+    /// Sym - The symbol to represent this section if it has one.
+    ELFSym *Sym;
 
-    // Symbol index into the Symbol table
-    unsigned SymTabIdx;
-
-    enum { 
-      STB_LOCAL = 0,
-      STB_GLOBAL = 1,
-      STB_WEAK = 2 
-    };
-
-    enum { 
-      STT_NOTYPE = 0,
-      STT_OBJECT = 1,
-      STT_FUNC = 2,
-      STT_SECTION = 3,
-      STT_FILE = 4 
-    };
-
-    enum {
-      STV_DEFAULT = 0,  // Visibility is specified by binding type
-      STV_INTERNAL = 1, // Defined by processor supplements
-      STV_HIDDEN = 2,   // Not visible to other components
-      STV_PROTECTED = 3 // Visible in other components but not preemptable
-    };
-
-    ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false),
-                                    IsConstant(false), NameIdx(0), Value(0),
-                                    Size(0), Info(0), Other(STV_DEFAULT),
-                                    SectionIdx(ELFSection::SHN_UNDEF),
-                                    SymTabIdx(0) {}
-
-    unsigned getBind() { return (Info >> 4) & 0xf; }
-    unsigned getType() { return Info & 0xf; }
-
-    void setBind(unsigned X) {
-      assert(X == (X & 0xF) && "Bind value out of range!");
-      Info = (Info & 0x0F) | (X << 4);
+    /// getSymIndex - Returns the symbol table index of the symbol
+    /// representing this section.
+    unsigned getSymbolTableIndex() const {
+      assert(Sym && "section not present in the symbol table");
+      return Sym->SymTabIdx;
     }
 
-    void setType(unsigned X) {
-      assert(X == (X & 0xF) && "Type value out of range!");
-      Info = (Info & 0xF0) | X;
-    }
-
-    void setVisibility(unsigned V) {
-      assert(V == (V & 0x3) && "Type value out of range!");
-      Other = V;
-    }
+    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
   };
 
   /// ELFRelocation - This class contains all the information necessary to
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 691f194..a6429f7 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -17,12 +17,16 @@
 #include "llvm/Function.h"
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 //===----------------------------------------------------------------------===//
 //                       ELFCodeEmitter Implementation
@@ -33,84 +37,75 @@ namespace llvm {
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  // Get the ELF Section that this function belongs in.
-  ES = &EW.getTextSection();
+  DEBUG(errs() << "processing function: "
+        << MF.getFunction()->getName() << "\n");
 
-  DOUT << "processing function: " << MF.getFunction()->getName() << "\n";
+  // Get the ELF Section that this function belongs in.
+  ES = &EW.getTextSection(MF.getFunction());
 
-  // FIXME: better memory management, this will be replaced by BinaryObjects
-  BinaryData &BD = ES->getData();
-  BD.reserve(4096);
-  BufferBegin = &BD[0];
-  BufferEnd = BufferBegin + BD.capacity();
+  // Set the desired binary object to be used by the code emitters
+  setBinaryObject(ES);
 
   // Get the function alignment in bytes
   unsigned Align = (1 << MF.getAlignment());
 
-  // Align the section size with the function alignment, so the function can
-  // start in a aligned offset, also update the section alignment if needed.
-  if (ES->Align < Align) ES->Align = Align;
-  ES->Size = (ES->Size + (Align-1)) & (-Align);
-
-  // Snaity check on allocated space for text section
-  assert( ES->Size < 4096 && "no more space in TextSection" );
-
-  // FIXME: Using ES->Size directly here instead of calculating it from the
-  // output buffer size (impossible because the code emitter deals only in raw
-  // bytes) forces us to manually synchronize size and write padding zero bytes
-  // to the output buffer for all non-text sections.  For text sections, we do
-  // not synchonize the output buffer, and we just blow up if anyone tries to
-  // write non-code to it.  An assert should probably be added to
-  // AddSymbolToSection to prevent calling it on the text section.
-  CurBufferPtr = BufferBegin + ES->Size;
-
-  // Record function start address relative to BufferBegin
-  FnStartPtr = CurBufferPtr;
+  // The function must start on its required alignment
+  ES->emitAlignment(Align);
+
+  // Update the section alignment if needed.
+  ES->Align = std::max(ES->Align, Align);
+
+  // Record the function start offset
+  FnStartOff = ES->getCurrentPCOffset();
+
+  // Emit constant pool and jump tables to their appropriate sections.
+  // They need to be emitted before the function because in some targets
+  // the later may reference JT or CP entry address.
+  emitConstantPool(MF.getConstantPool());
+  emitJumpTables(MF.getJumpTableInfo());
 }
 
 /// finishFunction - This callback is invoked after the function is completely
 /// finished.
 bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
-  // Update Section Size
-  ES->Size = CurBufferPtr - BufferBegin;
-
   // Add a symbol to represent the function.
   const Function *F = MF.getFunction();
-  ELFSym FnSym(F);
-  FnSym.setType(ELFSym::STT_FUNC);
-  FnSym.setBind(EW.getGlobalELFLinkage(F));
-  FnSym.setVisibility(EW.getGlobalELFVisibility(F));
-  FnSym.SectionIdx = ES->SectionIdx;
-  FnSym.Size = CurBufferPtr-FnStartPtr;
+  ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC,
+                                EW.getGlobalELFVisibility(F));
+  FnSym->SectionIdx = ES->SectionIdx;
+  FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+  EW.AddPendingGlobalSymbol(F, true);
 
   // Offset from start of Section
-  FnSym.Value = FnStartPtr-BufferBegin;
-
-  // Locals should go on the symbol list front
-  if (!F->hasPrivateLinkage()) {
-    if (FnSym.getBind() == ELFSym::STB_LOCAL)
-      EW.SymbolList.push_front(FnSym);
-    else
-      EW.SymbolList.push_back(FnSym);
+  FnSym->Value = FnStartOff;
+
+  if (!F->hasPrivateLinkage())
+    EW.SymbolList.push_back(FnSym);
+
+  // Patch up Jump Table Section relocations to use the real MBBs offsets
+  // now that the MBB label offsets inside the function are known.
+  if (!MF.getJumpTableInfo()->isEmpty()) {
+    ELFSection &JTSection = EW.getJumpTableSection();
+    for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+         MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+      MachineRelocation &MR = *MRI;
+      unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setResultPointer((void*)MBBOffset);
+      MR.setConstantVal(ES->SectionIdx);
+      JTSection.addRelocation(MR);
+    }
   }
 
-  // Emit constant pool to appropriate section(s)
-  emitConstantPool(MF.getConstantPool());
-
-  // Emit jump tables to appropriate section
-  emitJumpTables(MF.getJumpTableInfo());
-
-  // Relocations
-  // -----------
   // If we have emitted any relocations to function-specific objects such as
   // basic blocks, constant pools entries, or jump tables, record their
-  // addresses now so that we can rewrite them with the correct addresses
-  // later.
+  // addresses now so that we can rewrite them with the correct addresses later
   for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
     MachineRelocation &MR = Relocations[i];
     intptr_t Addr;
     if (MR.isGlobalValue()) {
-      EW.PendingGlobals.insert(MR.getGlobalValue());
+      EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+    } else if (MR.isExternalSymbol()) {
+      EW.AddPendingExternalSymbol(MR.getExternalSymbol());
     } else if (MR.isBasicBlock()) {
       Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
       MR.setConstantVal(ES->SectionIdx);
@@ -120,16 +115,18 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
       MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
       MR.setResultPointer((void*)Addr);
     } else if (MR.isJumpTableIndex()) {
+      ELFSection &JTSection = EW.getJumpTableSection();
       Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+      MR.setConstantVal(JTSection.SectionIdx);
       MR.setResultPointer((void*)Addr);
-      MR.setConstantVal(JumpTableSectionIdx);
     } else {
-      assert(0 && "Unhandled relocation type");
+      llvm_unreachable("Unhandled relocation type");
     }
     ES->addRelocation(MR);
   }
 
   // Clear per-function data structures.
+  JTRelocations.clear();
   Relocations.clear();
   CPLocations.clear();
   CPSections.clear();
@@ -148,25 +145,19 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
   assert(TM.getRelocationModel() != Reloc::PIC_ &&
          "PIC codegen not yet handled for elf constant pools!");
 
-  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
   for (unsigned i = 0, e = CP.size(); i != e; ++i) {
     MachineConstantPoolEntry CPE = CP[i];
 
-    // Get the right ELF Section for this constant pool entry
-    std::string CstPoolName =
-      TAI->SelectSectionForMachineConst(CPE.getType())->getName();
-    ELFSection &CstPoolSection =
-      EW.getConstantPoolSection(CstPoolName, CPE.getAlignment());
-
     // Record the constant pool location and the section index
-    CPLocations.push_back(CstPoolSection.size());
-    CPSections.push_back(CstPoolSection.SectionIdx);
+    ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+    CPLocations.push_back(CstPool.size());
+    CPSections.push_back(CstPool.SectionIdx);
 
     if (CPE.isMachineConstantPoolEntry())
       assert("CPE.isMachineConstantPoolEntry not supported yet");
 
     // Emit the constant to constant pool section
-    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPoolSection);
+    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
   }
 }
 
@@ -180,44 +171,32 @@ void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
   assert(TM.getRelocationModel() != Reloc::PIC_ &&
          "PIC codegen not yet handled for elf jump tables!");
 
-  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+  const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+  unsigned EntrySize = MJTI->getEntrySize();
 
   // Get the ELF Section to emit the jump table
-  unsigned Align = TM.getTargetData()->getPointerABIAlignment();
-  std::string JTName(TAI->getJumpTableDataSection());
-  ELFSection &JTSection = EW.getJumpTableSection(JTName, Align);
-  JumpTableSectionIdx = JTSection.SectionIdx;
-
-  // Entries in the JT Section are relocated against the text section
-  ELFSection &TextSection = EW.getTextSection();
+  ELFSection &JTSection = EW.getJumpTableSection();
 
   // For each JT, record its offset from the start of the section
   for (unsigned i = 0, e = JT.size(); i != e; ++i) {
     const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
 
-    DOUT << "JTSection.size(): " << JTSection.size() << "\n";
-    DOUT << "JTLocations.size: " << JTLocations.size() << "\n";
-
     // Record JT 'i' offset in the JT section
     JTLocations.push_back(JTSection.size());
 
     // Each MBB entry in the Jump table section has a relocation entry
     // against the current text section.
     for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+      unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
       MachineRelocation MR =
-        MachineRelocation::getBB(JTSection.size(),
-                                 MachineRelocation::VANILLA,
-                                 MBBs[mi]);
-
-      // Offset of JT 'i' in JT section
-      MR.setResultPointer((void*)getMachineBasicBlockAddress(MBBs[mi]));
-      MR.setConstantVal(TextSection.SectionIdx);
+        MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
 
       // Add the relocation to the Jump Table section
-      JTSection.addRelocation(MR);
+      JTRelocations.push_back(MR);
 
       // Output placeholder for MBB in the JT section
-      JTSection.emitWord(0);
+      for (unsigned s=0; s < EntrySize; ++s)
+        JTSection.emitByte(0);
     }
   }
 }
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index 982aebf..b5e9c84 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -10,7 +10,7 @@
 #ifndef ELFCODEEMITTER_H
 #define ELFCODEEMITTER_H
 
-#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include <vector>
 
 namespace llvm {
@@ -19,7 +19,7 @@ namespace llvm {
 
   /// ELFCodeEmitter - This class is used by the ELFWriter to 
   /// emit the code for functions to the ELF file.
-  class ELFCodeEmitter : public MachineCodeEmitter {
+  class ELFCodeEmitter : public ObjectCodeEmitter {
     ELFWriter &EW;
 
     /// Target machine description
@@ -28,102 +28,48 @@ namespace llvm {
     /// Section containing code for functions
     ELFSection *ES;
 
-    /// Relocations - These are the relocations that the function needs, as
-    /// emitted.
+    /// Relocations - Record relocations needed by the current function 
     std::vector<MachineRelocation> Relocations;
 
-    /// CPLocations - This is a map of constant pool indices to offsets from the
-    /// start of the section for that constant pool index.
-    std::vector<uintptr_t> CPLocations;
+    /// JTRelocations - Record relocations needed by the relocation
+    /// section.
+    std::vector<MachineRelocation> JTRelocations;
 
-    /// CPSections - This is a map of constant pool indices to the MachOSection
-    /// containing the constant pool entry for that index.
-    std::vector<unsigned> CPSections;
-
-    /// JTLocations - This is a map of jump table indices to offsets from the
-    /// start of the section for that jump table index.
-    std::vector<uintptr_t> JTLocations;
-
-    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
-    /// It is filled in by the StartMachineBasicBlock callback and queried by
-    /// the getMachineBasicBlockAddress callback.
-    std::vector<uintptr_t> MBBLocations;
-
-    /// FnStartPtr - Pointer to the start location of the current function
-    /// in the buffer
-    uint8_t *FnStartPtr;
-
-    /// JumpTableSectionIdx - Holds the index of the Jump Table Section 
-    unsigned JumpTableSectionIdx;
+    /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+    uintptr_t FnStartOff;
   public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM),
-                                             JumpTableSectionIdx(0) {}
-
-    void startFunction(MachineFunction &F);
-    bool finishFunction(MachineFunction &F);
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
 
+    /// addRelocation - Register new relocations for this function
     void addRelocation(const MachineRelocation &MR) {
       Relocations.push_back(MR);
     }
 
-    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
-      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-        MBBLocations.resize((MBB->getNumber()+1)*2);
-      MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-    }
+    /// emitConstantPool - For each constant pool entry, figure out which
+    /// section the constant should live in and emit data to it
+    void emitConstantPool(MachineConstantPool *MCP);
 
-    virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-      assert(CPLocations.size() > Index && "CP not emitted!");
-      return CPLocations[Index];
-    }
+    /// emitJumpTables - Emit all the jump tables for a given jump table
+    /// info and record them to the appropriate section.
+    void emitJumpTables(MachineJumpTableInfo *MJTI);
 
-    virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-      assert(JTLocations.size() > Index && "JT not emitted!");
-      return JTLocations[Index];
-    }
+    void startFunction(MachineFunction &F);
+    bool finishFunction(MachineFunction &F);
 
-    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
-             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-      return MBBLocations[MBB->getNumber()];
+    /// emitLabel - Emits a label
+    virtual void emitLabel(uint64_t LabelID) {
+      assert("emitLabel not implemented");
     }
 
+    /// getLabelAddress - Return the address of the specified LabelID, 
+    /// only usable after the LabelID has been emitted.
     virtual uintptr_t getLabelAddress(uint64_t Label) const {
-      assert(0 && "Label address not implementated yet!");
-      abort();
+      assert("getLabelAddress not implemented");
       return 0;
     }
 
-    virtual void emitLabel(uint64_t LabelID) {
-      assert(0 && "emit Label not implementated yet!");
-      abort();
-    }
-
-    /// emitConstantPool - For each constant pool entry, figure out which section
-    /// the constant should live in and emit the constant.
-    void emitConstantPool(MachineConstantPool *MCP);
-
-    /// emitJumpTables - Emit all the jump tables for a given jump table info
-    /// record to the appropriate section.
-    void emitJumpTables(MachineJumpTableInfo *MJTI);
-
     virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
 
-    /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
-    void startGVStub(const GlobalValue* F, unsigned StubSize,
-                     unsigned Alignment = 1) {
-      assert(0 && "JIT specific function called!");
-      abort();
-    }
-    void startGVStub(const GlobalValue* F,  void *Buffer, unsigned StubSize) {
-      assert(0 && "JIT specific function called!");
-      abort();
-    }
-    void *finishGVStub(const GlobalValue *F) {
-      assert(0 && "JIT specific function called!");
-      abort();
-      return 0;
-    }
 };  // end class ELFCodeEmitter
 
 } // end namespace llvm
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 9e91524..3e1ee11 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -29,7 +29,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "elfwriter"
-
 #include "ELF.h"
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
@@ -40,26 +39,33 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
+
 using namespace llvm;
 
 char ELFWriter::ID = 0;
-/// AddELFWriter - Concrete function to add the ELF writer to the function pass
-/// manager.
-MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
-                                       raw_ostream &O,
-                                       TargetMachine &TM) {
+
+/// AddELFWriter - Add the ELF writer to the function pass manager
+ObjectCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
+                                      raw_ostream &O,
+                                      TargetMachine &TM) {
   ELFWriter *EW = new ELFWriter(O, TM);
   PM.add(EW);
-  return &EW->getMachineCodeEmitter();
+  return EW->getObjectCodeEmitter();
 }
 
 //===----------------------------------------------------------------------===//
@@ -68,27 +74,51 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(&ID), O(o), TM(tm),
+    OutContext(*new MCContext()),
+    TLOF(TM.getTargetLowering()->getObjFileLowering()),
     is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
     isLittleEndian(TM.getTargetData()->isLittleEndian()),
     ElfHdr(isLittleEndian, is64Bit) {
 
-  TAI = TM.getTargetAsmInfo();
+  MAI = TM.getMCAsmInfo();
   TEW = TM.getELFWriterInfo();
 
-  // Create the machine code emitter object for this target.
-  MCE = new ELFCodeEmitter(*this);
+  // Create the object code emitter object for this target.
+  ElfCE = new ELFCodeEmitter(*this);
 
   // Inital number of sections
   NumSections = 0;
 }
 
 ELFWriter::~ELFWriter() {
-  delete MCE;
+  delete ElfCE;
+  delete &OutContext;
+
+  while(!SymbolList.empty()) {
+    delete SymbolList.back(); 
+    SymbolList.pop_back();
+  }
+
+  while(!PrivateSyms.empty()) {
+    delete PrivateSyms.back(); 
+    PrivateSyms.pop_back();
+  }
+
+  while(!SectionList.empty()) {
+    delete SectionList.back(); 
+    SectionList.pop_back();
+  }
+
+  // Release the name mangler object.
+  delete Mang; Mang = 0;
 }
 
 // doInitialization - Emit the file header and all of the global variables for
 // the module to the ELF file.
 bool ELFWriter::doInitialization(Module &M) {
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+  
   Mang = new Mangler(M);
 
   // ELF Header
@@ -138,13 +168,115 @@ bool ELFWriter::doInitialization(Module &M) {
   // Add the null section, which is required to be first in the file.
   getNullSection();
 
+  // The first entry in the symtab is the null symbol and the second
+  // is a local symbol containing the module/file name
+  SymbolList.push_back(new ELFSym());
+  SymbolList.push_back(ELFSym::getFileSym());
+
   return false;
 }
 
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                       bool AddToLookup /* = false */) {
+  PendingGlobals.insert(GV);
+  if (AddToLookup) 
+    GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+  PendingExternals.insert(External);
+  ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+  const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+  return getSection(Data->getSectionName(), Data->getType(), 
+                    Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+  const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+  return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+  const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+  return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); 
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+  const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+  return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(Function *F) {
+  const MCSectionELF *Text = 
+    (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+  return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when 
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+  const MCSectionELF *JT = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+  return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+                    TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine 
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+  SectionKind Kind;
+  switch (CPE.getRelocationInfo()) {
+  default: llvm_unreachable("Unknown section kind");
+  case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+  case 1:
+    Kind = SectionKind::getReadOnlyWithRelLocal();
+    break;
+  case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16(); break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+  }
+
+  const MCSectionELF *CPSect = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+  return getSection(CPSect->getSectionName(), CPSect->getType(), 
+                    CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA' 
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+  unsigned SectionType = TEW->hasRelocationAddend() ?
+                ELFSection::SHT_RELA : ELFSection::SHT_REL;
+
+  std::string SectionName(".rel");
+  if (TEW->hasRelocationAddend())
+    SectionName.append("a");
+  SectionName.append(S.getName());
+
+  return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
 unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
   switch (GV->getVisibility()) {
   default:
-    assert(0 && "unknown visibility type");
+    llvm_unreachable("unknown visibility type");
   case GlobalValue::DefaultVisibility:
     return ELFSym::STV_DEFAULT;
   case GlobalValue::HiddenVisibility:
@@ -152,134 +284,132 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
   case GlobalValue::ProtectedVisibility:
     return ELFSym::STV_PROTECTED;
   }
-
   return 0;
 }
 
-unsigned ELFWriter::getGlobalELFLinkage(const GlobalValue *GV) {
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
   if (GV->hasInternalLinkage())
     return ELFSym::STB_LOCAL;
 
-  if (GV->hasWeakLinkage())
+  if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
     return ELFSym::STB_WEAK;
 
   return ELFSym::STB_GLOBAL;
 }
 
-// getElfSectionFlags - Get the ELF Section Header based on the
-// flags defined in ELFTargetAsmInfo.
-unsigned ELFWriter::getElfSectionFlags(unsigned Flags) {
-  unsigned ElfSectionFlags = ELFSection::SHF_ALLOC;
-
-  if (Flags & SectionFlags::Code)
-    ElfSectionFlags |= ELFSection::SHF_EXECINSTR;
-  if (Flags & SectionFlags::Writeable)
-    ElfSectionFlags |= ELFSection::SHF_WRITE;
-  if (Flags & SectionFlags::Mergeable)
-    ElfSectionFlags |= ELFSection::SHF_MERGE;
-  if (Flags & SectionFlags::TLS)
-    ElfSectionFlags |= ELFSection::SHF_TLS;
-  if (Flags & SectionFlags::Strings)
-    ElfSectionFlags |= ELFSection::SHF_STRINGS;
-
-  return ElfSectionFlags;
-}
-
-// For global symbols without a section, return the Null section as a
-// placeholder
-ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV,
-                                              ELFSym &Sym) {
-  // If this is a declaration, the symbol does not have a section.
-  if (!GV->hasInitializer()) {
-    Sym.SectionIdx = ELFSection::SHN_UNDEF;
-    return getNullSection();
-  }
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+  if (GV->isDeclaration())
+    return ELFSym::STT_NOTYPE;
 
-  // Get the name and flags of the section for the global
-  const Section *S = TAI->SectionForGlobal(GV);
-  unsigned SectionType = ELFSection::SHT_PROGBITS;
-  unsigned SectionFlags = getElfSectionFlags(S->getFlags());
-  DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n";
+  if (isa<Function>(GV))
+    return ELFSym::STT_FUNC;
 
-  const TargetData *TD = TM.getTargetData();
-  unsigned Align = TD->getPreferredAlignment(GV);
-  Constant *CV = GV->getInitializer();
-
-  // If this global has a zero initializer, go to .bss or common section.
-  // Variables are part of the common block if they are zero initialized
-  // and allowed to be merged with other symbols.
-  if (CV->isNullValue() || isa<UndefValue>(CV)) {
-    SectionType = ELFSection::SHT_NOBITS;
-    ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags);
-    if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
-        GV->hasCommonLinkage()) {
-      Sym.SectionIdx = ELFSection::SHN_COMMON;
-      Sym.IsCommon = true;
-      ElfS.Align = 1;
-      return ElfS;
-    }
-    Sym.IsBss = true;
-    Sym.SectionIdx = ElfS.SectionIdx;
-    if (Align) ElfS.Size = (ElfS.Size + Align-1) & ~(Align-1);
-    ElfS.Align = std::max(ElfS.Align, Align);
-    return ElfS;
-  }
-
-  Sym.IsConstant = true;
-  ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags);
-  Sym.SectionIdx = ElfS.SectionIdx;
-  ElfS.Align = std::max(ElfS.Align, Align);
-  return ElfS;
+  return ELFSym::STT_OBJECT;
 }
 
-void ELFWriter::EmitFunctionDeclaration(const Function *F) {
-  ELFSym GblSym(F);
-  GblSym.setBind(ELFSym::STB_GLOBAL);
-  GblSym.setType(ELFSym::STT_NOTYPE);
-  GblSym.setVisibility(ELFSym::STV_DEFAULT);
-  GblSym.SectionIdx = ELFSection::SHN_UNDEF;
-  SymbolList.push_back(GblSym);
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+  return GV->isDeclaration() || (isa<Function>(GV));
 }
 
-void ELFWriter::EmitGlobalVar(const GlobalVariable *GV) {
-  unsigned SymBind = getGlobalELFLinkage(GV);
-  unsigned Align=0, Size=0;
-  ELFSym GblSym(GV);
-  GblSym.setBind(SymBind);
-  GblSym.setVisibility(getGlobalELFVisibility(GV));
-
-  if (GV->hasInitializer()) {
-    GblSym.setType(ELFSym::STT_OBJECT);
-    const TargetData *TD = TM.getTargetData();
-    Align = TD->getPreferredAlignment(GV);
-    Size = TD->getTypeAllocSize(GV->getInitializer()->getType());
-    GblSym.Size = Size;
+// AddToSymbolList - Update the symbol lookup and If the symbol is 
+// private add it to PrivateSyms list, otherwise to SymbolList. 
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+  assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+  const GlobalValue *GV = GblSym->getGlobalValue(); 
+  if (GV->hasPrivateLinkage()) {
+    // For a private symbols, keep track of the index inside 
+    // the private list since it will never go to the symbol 
+    // table and won't be patched up later.
+    PrivateSyms.push_back(GblSym);
+    GblSymLookup[GV] = PrivateSyms.size()-1;
   } else {
-    GblSym.setType(ELFSym::STT_NOTYPE);
+    // Non private symbol are left with zero indices until 
+    // they are patched up during the symbol table emition 
+    // (where the indicies are created).
+    SymbolList.push_back(GblSym);
+    GblSymLookup[GV] = 0;
   }
+}
 
-  ELFSection &GblSection = getGlobalSymELFSection(GV, GblSym);
-
-  if (GblSym.IsCommon) {
-    GblSym.Value = Align;
-  } else if (GblSym.IsBss) {
-    GblSym.Value = GblSection.Size;
-    GblSection.Size += Size;
-  } else if (GblSym.IsConstant){
-    // GblSym.Value should contain the symbol index inside the section,
-    // and all symbols should start on their required alignment boundary
-    GblSym.Value = (GblSection.size() + (Align-1)) & (-Align);
-    GblSection.emitAlignment(Align);
-    EmitGlobalConstant(GV->getInitializer(), GblSection);
-  }
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
 
-  // Local symbols should come first on the symbol table.
-  if (!GV->hasPrivateLinkage()) {
-    if (SymBind == ELFSym::STB_LOCAL)
-      SymbolList.push_front(GblSym);
-    else
-      SymbolList.push_back(GblSym);
+  // Check if the referenced symbol is already emitted
+  if (GblSymLookup.find(GV) != GblSymLookup.end())
+    return;
+
+  // Handle ELF Bind, Visibility and Type for the current symbol
+  unsigned SymBind = getGlobalELFBinding(GV);
+  unsigned SymType = getGlobalELFType(GV);
+  bool IsUndefSym = IsELFUndefSym(GV);
+
+  ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+    : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+  if (!IsUndefSym) {
+    assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+    // Handle special llvm globals
+    if (EmitSpecialLLVMGlobal(GVar))
+      return;
+
+    // Get the ELF section where this global belongs from TLOF
+    const MCSectionELF *S = 
+      (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+    ELFSection &ES = 
+      getSection(S->getSectionName(), S->getType(), S->getFlags());
+    SectionKind Kind = S->getKind();
+
+    // The symbol align should update the section alignment if needed
+    const TargetData *TD = TM.getTargetData();
+    unsigned Align = TD->getPreferredAlignment(GVar);
+    unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+    GblSym->Size = Size;
+
+    if (S->HasCommonSymbols()) { // Symbol must go to a common section
+      GblSym->SectionIdx = ELFSection::SHN_COMMON;
+
+      // A new linkonce section is created for each global in the
+      // common section, the default alignment is 1 and the symbol
+      // value contains its alignment.
+      ES.Align = 1;
+      GblSym->Value = Align;
+
+    } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // Update the size with alignment and the next object can
+      // start in the right offset in the section
+      if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+      ES.Align = std::max(ES.Align, Align);
+
+      // GblSym->Value should contain the virtual offset inside the section.
+      // Virtual because the BSS space is not allocated on ELF objects
+      GblSym->Value = ES.Size;
+      ES.Size += Size;
+
+    } else { // The symbol must go to some kind of data section
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // GblSym->Value should contain the symbol offset inside the section,
+      // and all symbols should start on their required alignment boundary
+      ES.Align = std::max(ES.Align, Align);
+      ES.emitAlignment(Align);
+      GblSym->Value = ES.size();
+
+      // Emit the global to the data section 'ES'
+      EmitGlobalConstant(GVar->getInitializer(), ES);
+    }
   }
+
+  AddToSymbolList(GblSym);
 }
 
 void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
@@ -305,8 +435,7 @@ void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
     // as padding to ensure that the next field starts at the right offset.
-    for (unsigned p=0; p < padSize; p++)
-      GblS.emitByte(0);
+    GblS.emitZeros(padSize);
   }
   assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
          "Layout of constant struct may be incorrect!");
@@ -317,65 +446,242 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
   unsigned Size = TD->getTypeAllocSize(CV->getType());
 
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
-    if (CVA->isString()) {
-      std::string GblStr = CVA->getAsString();
-      GblStr.resize(GblStr.size()-1);
-      GblS.emitString(GblStr);
-    } else { // Not a string.  Print the values in successive locations
-      for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
-        EmitGlobalConstant(CVA->getOperand(i), GblS);
-    }
+    for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstant(CVA->getOperand(i), GblS);
+    return;
+  } else if (isa<ConstantAggregateZero>(CV)) {
+    GblS.emitZeros(Size);
     return;
   } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
     EmitGlobalConstantStruct(CVS, GblS);
     return;
   } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    if (CFP->getType() == Type::DoubleTy)
-      GblS.emitWord64(Val);
-    else if (CFP->getType() == Type::FloatTy)
-      GblS.emitWord32(Val);
-    else if (CFP->getType() == Type::X86_FP80Ty) {
-      assert(0 && "X86_FP80Ty global emission not implemented");
-    } else if (CFP->getType() == Type::PPC_FP128Ty)
-      assert(0 && "PPC_FP128Ty global emission not implemented");
+    APInt Val = CFP->getValueAPF().bitcastToAPInt();
+    if (CFP->getType()->isDoubleTy())
+      GblS.emitWord64(Val.getZExtValue());
+    else if (CFP->getType()->isFloatTy())
+      GblS.emitWord32(Val.getZExtValue());
+    else if (CFP->getType()->isX86_FP80Ty()) {
+      unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+                         TD->getTypeStoreSize(CFP->getType());
+      GblS.emitWordFP80(Val.getRawData(), PadSize);
+    } else if (CFP->getType()->isPPC_FP128Ty())
+      llvm_unreachable("PPC_FP128Ty global emission not implemented");
     return;
   } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (Size == 4)
+    if (Size == 1)
+      GblS.emitByte(CI->getZExtValue());
+    else if (Size == 2) 
+      GblS.emitWord16(CI->getZExtValue());
+    else if (Size == 4)
       GblS.emitWord32(CI->getZExtValue());
-    else if (Size == 8)
-      GblS.emitWord64(CI->getZExtValue());
-    else
-      assert(0 && "LargeInt global emission not implemented");
+    else 
+      EmitGlobalConstantLargeInt(CI, GblS);
     return;
   } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     const VectorType *PTy = CP->getType();
     for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
       EmitGlobalConstant(CP->getOperand(I), GblS);
     return;
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Resolve a constant expression which returns a (Constant, Offset)
+    // pair. If 'Res.first' is a GlobalValue, emit a relocation with 
+    // the offset 'Res.second', otherwise emit a global constant like
+    // it is always done for not contant expression types.
+    CstExprResTy Res = ResolveConstantExpr(CE);
+    const Constant *Op = Res.first;
+
+    if (isa<GlobalValue>(Op))
+      EmitGlobalDataRelocation(cast<const GlobalValue>(Op), 
+                               TD->getTypeAllocSize(Op->getType()), 
+                               GblS, Res.second);
+    else
+      EmitGlobalConstant(Op, GblS);
+
+    return;
+  } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+    // Fill the data entry with zeros or emit a relocation entry
+    if (isa<ConstantPointerNull>(CV))
+      GblS.emitZeros(Size);
+    else 
+      EmitGlobalDataRelocation(cast<const GlobalValue>(CV), 
+                               Size, GblS);
+    return;
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    // This is a constant address for a global variable or function and
+    // therefore must be referenced using a relocation entry.
+    EmitGlobalDataRelocation(GV, Size, GblS);
+    return;
+  }
+
+  std::string msg;
+  raw_string_ostream ErrorMsg(msg);
+  ErrorMsg << "Constant unimp for type: " << *CV->getType();
+  llvm_report_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+  const TargetData *TD = TM.getTargetData();
+  
+  // There ins't constant expression inside others anymore
+  if (!isa<ConstantExpr>(CV))
+    return std::make_pair(CV, 0);
+
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+    return ResolveConstantExpr(CE->getOperand(0));
+  
+  case Instruction::GetElementPtr: {
+    const Constant *ptrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                          idxVec.size());
+    return std::make_pair(ptrVal, Offset);
+  }
+  case Instruction::IntToPtr: {
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return ResolveConstantExpr(Op);
+  }
+  case Instruction::PtrToInt: {
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot greater or equal to the size of the pointer.
+    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+      return ResolveConstantExpr(Op);
+
+    llvm_unreachable("Integer size less then pointer size");
+  }
+  case Instruction::Add:
+  case Instruction::Sub: {
+    // Only handle cases where there's a constant expression with GlobalValue
+    // as first operand and ConstantInt as second, which are the cases we can
+    // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+    // 1)  Instruction::Add  => (global) + CstInt
+    // 2)  Instruction::Sub  => (global) + -CstInt
+    const Constant *Op0 = CE->getOperand(0); 
+    const Constant *Op1 = CE->getOperand(1); 
+    assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+    CstExprResTy Res = ResolveConstantExpr(Op0);
+    assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+    const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+    switch (CE->getOpcode()) {
+    case Instruction::Add: 
+      return std::make_pair(Res.first, RHS.getSExtValue());
+    case Instruction::Sub:
+      return std::make_pair(Res.first, (-RHS).getSExtValue());
+    }
+  }
+  }
+
+  std::string msg(CE->getOpcodeName());
+  raw_string_ostream ErrorMsg(msg);
+  ErrorMsg << ": Unsupported ConstantExpr type";
+  llvm_report_error(ErrorMsg.str());
+
+  return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+                                         ELFSection &GblS, int64_t Offset) {
+  // Create the relocation entry for the global value
+  MachineRelocation MR =
+    MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+                             TEW->getAbsoluteLabelMachineRelTy(),
+                             const_cast<GlobalValue*>(GV),
+                             Offset);
+
+  // Fill the data entry with zeros
+  GblS.emitZeros(Size);
+
+  // Add the relocation entry for the current data section
+  GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, 
+                                           ELFSection &S) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned BitWidth = CI->getBitWidth();
+  assert(isPowerOf2_32(BitWidth) &&
+         "Non-power-of-2-sized integers not handled!");
+
+  const uint64_t *RawData = CI->getValue().getRawData();
+  uint64_t Val = 0;
+  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+    Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+    S.emitWord64(Val);
   }
-  assert(0 && "unknown global constant");
 }
 
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  if (GV->getName() == "llvm.used")
+    llvm_unreachable("not implemented yet");
+
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
+  if (GV->getSection() == "llvm.metadata" ||
+      GV->hasAvailableExternallyLinkage())
+    return true;
+  
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+  
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = TD->getPointerPrefAlignment();
+  if (GV->getName() == "llvm.global_ctors") {
+    ELFSection &Ctor = getCtorSection();
+    Ctor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Ctor);
+    return true;
+  } 
+  
+  if (GV->getName() == "llvm.global_dtors") {
+    ELFSection &Dtor = getDtorSection();
+    Dtor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Dtor);
+    return true;
+  }
+  
+  return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (!isa<ConstantArray>(List)) return;
+  ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      // Emit the function pointer.
+      EmitGlobalConstant(CS->getOperand(1), Xtor);
+    }
+}
 
 bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
-  // Nothing to do here, this is all done through the MCE object above.
+  // Nothing to do here, this is all done through the ElfCE object above.
   return false;
 }
 
 /// doFinalization - Now that the module has been completely processed, emit
 /// the ELF file to 'O'.
 bool ELFWriter::doFinalization(Module &M) {
-  /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the
-  /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data
-  /// vector size for .text sections, so this is a quick dirty fix
-  ELFSection &TS = getTextSection();
-  if (TS.Size) {
-    BinaryData &BD = TS.getData();
-    for (unsigned e=0; e<TS.Size; ++e)
-      BD.push_back(BD[e]);
-  }
-
   // Emit .data section placeholder
   getDataSection();
 
@@ -384,57 +690,34 @@ bool ELFWriter::doFinalization(Module &M) {
 
   // Build and emit data, bss and "common" sections.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    EmitGlobalVar(I);
-    GblSymLookup[I] = 0;
-  }
+       I != E; ++I)
+    EmitGlobal(I);
 
   // Emit all pending globals
-  // TODO: this should be done only for referenced symbols
-  for (SetVector<GlobalValue*>::const_iterator I = PendingGlobals.begin(),
-       E = PendingGlobals.end(); I != E; ++I) {
+  for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+       I != E; ++I)
+    EmitGlobal(*I);
 
-    // No need to emit the symbol again
-    if (GblSymLookup.find(*I) != GblSymLookup.end())
-      continue;
-
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) {
-      EmitGlobalVar(GV);
-    } else if (Function *F = dyn_cast<Function>(*I)) {
-      // If function is not in GblSymLookup, it doesn't have a body,
-      // so emit the symbol as a function declaration (no section associated)
-      EmitFunctionDeclaration(F);
-    } else {
-      assert("unknown howto handle pending global");
-    }
-    GblSymLookup[*I] = 0;
-  }
+  // Emit all pending externals
+  for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+       I != E; ++I)
+    SymbolList.push_back(ELFSym::getExtSym(*I));
 
   // Emit non-executable stack note
-  if (TAI->getNonexecutableStackDirective())
+  if (MAI->getNonexecutableStackDirective())
     getNonExecStackSection();
 
-  // Emit a symbol for each section created until now
-  for (std::map<std::string, ELFSection*>::iterator I = SectionLookup.begin(),
-       E = SectionLookup.end(); I != E; ++I) {
-    ELFSection *ES = I->second;
-
-    // Skip null section
-    if (ES->SectionIdx == 0) continue;
-
-    ELFSym SectionSym(0);
-    SectionSym.SectionIdx = ES->SectionIdx;
-    SectionSym.Size = 0;
-    SectionSym.setBind(ELFSym::STB_LOCAL);
-    SectionSym.setType(ELFSym::STT_SECTION);
-    SectionSym.setVisibility(ELFSym::STV_DEFAULT);
-
-    // Local symbols go in the list front
-    SymbolList.push_front(SectionSym);
+  // Emit a symbol for each section created until now, skip null section
+  for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    ELFSym *SectionSym = ELFSym::getSectionSym();
+    SectionSym->SectionIdx = ES.SectionIdx;
+    SymbolList.push_back(SectionSym);
+    ES.Sym = SymbolList.back();
   }
 
   // Emit string table
-  EmitStringTable();
+  EmitStringTable(M.getModuleIdentifier());
 
   // Emit the symbol table now, if non-empty.
   EmitSymbolTable();
@@ -448,77 +731,106 @@ bool ELFWriter::doFinalization(Module &M) {
   // Dump the sections and section table to the .o file.
   OutputSectionsAndSectionTable();
 
-  // We are done with the abstract symbols.
-  SectionList.clear();
-  NumSections = 0;
-
-  // Release the name mangler object.
-  delete Mang; Mang = 0;
   return false;
 }
 
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+                              int64_t Value, unsigned Size) {
+  if (Size == 32)
+    BO.fixWord32(Value, Offset);
+  else if (Size == 64)
+    BO.fixWord64(Value, Offset);
+  else
+    llvm_unreachable("don't know howto patch relocatable field");
+}
+
 /// EmitRelocations - Emit relocations
 void ELFWriter::EmitRelocations() {
 
+  // True if the target uses the relocation entry to hold the addend,
+  // otherwise the addend is written directly to the relocatable field.
+  bool HasRelA = TEW->hasRelocationAddend();
+
   // Create Relocation sections for each section which needs it.
-  for (std::list<ELFSection>::iterator I = SectionList.begin(),
-       E = SectionList.end(); I != E; ++I) {
+  for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+    ELFSection &S = *SectionList[i];
 
     // This section does not have relocations
-    if (!I->hasRelocations()) continue;
-
-    // Get the relocation section for section 'I'
-    bool HasRelA = TEW->hasRelocationAddend();
-    ELFSection &RelSec = getRelocSection(I->getName(), HasRelA,
-                                         TEW->getPrefELFAlignment());
+    if (!S.hasRelocations()) continue;
+    ELFSection &RelSec = getRelocSection(S);
 
     // 'Link' - Section hdr idx of the associated symbol table
     // 'Info' - Section hdr idx of the section to which the relocation applies
     ELFSection &SymTab = getSymbolTableSection();
     RelSec.Link = SymTab.SectionIdx;
-    RelSec.Info = I->SectionIdx;
+    RelSec.Info = S.SectionIdx;
     RelSec.EntSize = TEW->getRelocationEntrySize();
 
     // Get the relocations from Section
-    std::vector<MachineRelocation> Relos = I->getRelocations();
+    std::vector<MachineRelocation> Relos = S.getRelocations();
     for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
          MRE = Relos.end(); MRI != MRE; ++MRI) {
       MachineRelocation &MR = *MRI;
 
-      // Offset from the start of the section containing the symbol
-      unsigned Offset = MR.getMachineCodeOffset();
+      // Relocatable field offset from the section start
+      unsigned RelOffset = MR.getMachineCodeOffset();
 
       // Symbol index in the symbol table
       unsigned SymIdx = 0;
 
-      // Target specific ELF relocation type
+      // Target specific relocation field type and size
       unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
-
-      // Constant addend used to compute the value to be stored 
-      // into the relocatable field
+      unsigned RelTySize = TEW->getRelocationTySize(RelType);
       int64_t Addend = 0;
 
       // There are several machine relocations types, and each one of
       // them needs a different approach to retrieve the symbol table index.
       if (MR.isGlobalValue()) {
         const GlobalValue *G = MR.getGlobalValue();
+        int64_t GlobalOffset = MR.getConstantVal();
         SymIdx = GblSymLookup[G];
-        Addend = TEW->getAddendForRelTy(RelType);
+        if (G->hasPrivateLinkage()) {
+          // If the target uses a section offset in the relocation:
+          // SymIdx + Addend = section sym for global + section offset
+          unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+          Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+          SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+        } else {
+          Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+        }
+      } else if (MR.isExternalSymbol()) {
+        const char *ExtSym = MR.getExternalSymbol();
+        SymIdx = ExtSymLookup[ExtSym];
+        Addend = TEW->getDefaultAddendForRelTy(RelType);
       } else {
+        // Get the symbol index for the section symbol
         unsigned SectionIdx = MR.getConstantVal();
-        // TODO: use a map for this.
-        for (std::list<ELFSym>::iterator I = SymbolList.begin(),
-             E = SymbolList.end(); I != E; ++I)
-          if ((SectionIdx == I->SectionIdx) &&
-              (I->getType() == ELFSym::STT_SECTION)) {
-            SymIdx = I->SymTabIdx;
-            break;
-          }
-        Addend = (uint64_t)MR.getResultPointer();
+        SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+        // The symbol offset inside the section
+        int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+        // For pc relative relocations where symbols are defined in the same
+        // section they are referenced, ignore the relocation entry and patch
+        // the relocatable field with the symbol offset directly.
+        if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+          int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+          RelocateField(S, RelOffset, Value, RelTySize);
+          continue;
+        }
+
+        Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
       }
 
+      // The target without addend on the relocation symbol must be
+      // patched in the relocation place itself to contain the addend
+      // otherwise write zeros to make sure there is no garbage there
+      RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
       // Get the relocation entry and emit to the relocation section
-      ELFRelocation Rel(Offset, SymIdx, RelType, HasRelA, Addend);
+      ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
       EmitRelocation(RelSec, Rel, HasRelA);
     }
   }
@@ -554,7 +866,7 @@ void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
 
 /// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
 /// Section Header Table
-void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, 
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
                                   const ELFSection &SHdr) {
   SHdrTab.emitWord32(SHdr.NameIdx);
   SHdrTab.emitWord32(SHdr.Type);
@@ -581,27 +893,30 @@ void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
 
 /// EmitStringTable - If the current symbol table is non-empty, emit the string
 /// table for it
-void ELFWriter::EmitStringTable() {
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
   if (!SymbolList.size()) return;  // Empty symbol table.
   ELFSection &StrTab = getStringTableSection();
 
   // Set the zero'th symbol to a null byte, as required.
   StrTab.emitByte(0);
 
-  // Walk on the symbol list and write symbol names into the
-  // string table.
+  // Walk on the symbol list and write symbol names into the string table.
   unsigned Index = 1;
-  for (std::list<ELFSym>::iterator I = SymbolList.begin(),
-       E = SymbolList.end(); I != E; ++I) {
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    ELFSym &Sym = *(*I);
 
-    // Use the name mangler to uniquify the LLVM symbol.
     std::string Name;
-    if (I->GV) Name.append(Mang->getValueName(I->GV));
+    if (Sym.isGlobalValue())
+      Name.append(Mang->getMangledName(Sym.getGlobalValue()));
+    else if (Sym.isExternalSym())
+      Name.append(Sym.getExternalSymbol());
+    else if (Sym.isFileType())
+      Name.append(ModuleName);
 
     if (Name.empty()) {
-      I->NameIdx = 0;
+      Sym.NameIdx = 0;
     } else {
-      I->NameIdx = Index;
+      Sym.NameIdx = Index;
       StrTab.emitString(Name);
 
       // Keep track of the number of bytes emitted to this section.
@@ -612,11 +927,38 @@ void ELFWriter::EmitStringTable() {
   StrTab.Size = Index;
 }
 
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+  unsigned FirstNonLocalSymbol;
+  std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    if ((*I)->isLocalBind())
+      LocalSyms.push_back(*I);
+    else
+      OtherSyms.push_back(*I);
+  }
+  SymbolList.clear();
+  FirstNonLocalSymbol = LocalSyms.size();
+
+  for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+    SymbolList.push_back(LocalSyms[i]);
+
+  for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+    SymbolList.push_back(*I);
+
+  LocalSyms.clear();
+  OtherSyms.clear();
+
+  return FirstNonLocalSymbol;
+}
+
 /// EmitSymbolTable - Emit the symbol table itself.
 void ELFWriter::EmitSymbolTable() {
   if (!SymbolList.size()) return;  // Empty symbol table.
 
-  unsigned FirstNonLocalSymbol = 1;
   // Now that we have emitted the string table and know the offset into the
   // string table of each symbol, emit the symbol table itself.
   ELFSection &SymTab = getSymbolTableSection();
@@ -628,30 +970,27 @@ void ELFWriter::EmitSymbolTable() {
   // Size of each symtab entry.
   SymTab.EntSize = TEW->getSymTabEntrySize();
 
-  // The first entry in the symtab is the null symbol
-  ELFSym NullSym = ELFSym(0);
-  EmitSymbol(SymTab, NullSym);
+  // Reorder the symbol table with local symbols first!
+  unsigned FirstNonLocalSymbol = SortSymbols();
 
-  // Emit all the symbols to the symbol table. Skip the null
-  // symbol, cause it's emitted already
-  unsigned Index = 1;
-  for (std::list<ELFSym>::iterator I = SymbolList.begin(),
-       E = SymbolList.end(); I != E; ++I, ++Index) {
-    // Keep track of the first non-local symbol
-    if (I->getBind() == ELFSym::STB_LOCAL)
-      FirstNonLocalSymbol++;
+  // Emit all the symbols to the symbol table.
+  for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+    ELFSym &Sym = *SymbolList[i];
 
     // Emit symbol to the symbol table
-    EmitSymbol(SymTab, *I);
+    EmitSymbol(SymTab, Sym);
 
-    // Record the symbol table index for each global value
-    if (I->GV)
-      GblSymLookup[I->GV] = Index;
+    // Record the symbol table index for each symbol
+    if (Sym.isGlobalValue())
+      GblSymLookup[Sym.getGlobalValue()] = i;
+    else if (Sym.isExternalSym())
+      ExtSymLookup[Sym.getExternalSymbol()] = i;
 
     // Keep track on the symbol index into the symbol table
-    I->SymTabIdx = Index;
+    Sym.SymTabIdx = i;
   }
 
+  // One greater than the symbol table index of the last local symbol
   SymTab.Info = FirstNonLocalSymbol;
   SymTab.Size = SymTab.size();
 }
@@ -671,15 +1010,15 @@ void ELFWriter::EmitSectionTableStringTable() {
   // the string table.
   unsigned Index = 0;
 
-  for (std::list<ELFSection>::iterator I = SectionList.begin(),
-         E = SectionList.end(); I != E; ++I) {
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
     // Set the index into the table.  Note if we have lots of entries with
     // common suffixes, we could memoize them here if we cared.
-    I->NameIdx = Index;
-    SHStrTab.emitString(I->getName());
+    S.NameIdx = Index;
+    SHStrTab.emitString(S.getName());
 
     // Keep track of the number of bytes emitted to this section.
-    Index += I->getName().size()+1;
+    Index += S.getName().size()+1;
   }
 
   // Set the size of .shstrtab now that we know what it is.
@@ -694,29 +1033,24 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   // Pass #1: Compute the file offset for each section.
   size_t FileOff = ElfHdr.size();   // File header first.
 
-  // Adjust alignment of all section if needed.
-  for (std::list<ELFSection>::iterator I = SectionList.begin(),
-         E = SectionList.end(); I != E; ++I) {
-
-    // Section idx 0 has 0 offset
-    if (!I->SectionIdx)
-      continue;
-
-    if (!I->size()) {
-      I->Offset = FileOff;
+  // Adjust alignment of all section if needed, skip the null section.
+  for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    if (!ES.size()) {
+      ES.Offset = FileOff;
       continue;
     }
 
     // Update Section size
-    if (!I->Size)
-      I->Size = I->size();
+    if (!ES.Size)
+      ES.Size = ES.size();
 
     // Align FileOff to whatever the alignment restrictions of the section are.
-    if (I->Align)
-      FileOff = (FileOff+I->Align-1) & ~(I->Align-1);
+    if (ES.Align)
+      FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
 
-    I->Offset = FileOff;
-    FileOff += I->Size;
+    ES.Offset = FileOff;
+    FileOff += ES.Size;
   }
 
   // Align Section Header.
@@ -740,11 +1074,11 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   BinaryObject SHdrTable(isLittleEndian, is64Bit);
 
   // Emit all of sections to the file and build the section header table.
-  while (!SectionList.empty()) {
-    ELFSection &S = *SectionList.begin();
-    DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
-         << ", Size: " << S.Size << ", Offset: " << S.Offset
-         << ", SectionData Size: " << S.size() << "\n";
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
+    DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+                 << ", Size: " << S.Size << ", Offset: " << S.Offset
+                 << ", SectionData Size: " << S.size() << "\n");
 
     // Align FileOff to whatever the alignment restrictions of the section are.
     if (S.size()) {
@@ -758,7 +1092,6 @@ void ELFWriter::OutputSectionsAndSectionTable() {
     }
 
     EmitSectionHeader(SHdrTable, S);
-    SectionList.pop_front();
   }
 
   // Align output for the section table.
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index bab118c..b61b484 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -16,23 +16,35 @@
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include <list>
 #include <map>
 
 namespace llvm {
   class BinaryObject;
   class Constant;
+  class ConstantInt;
   class ConstantStruct;
   class ELFCodeEmitter;
+  class ELFRelocation;
+  class ELFSection;
+  struct ELFSym;
   class GlobalVariable;
+  class JITDebugRegisterer;
   class Mangler;
   class MachineCodeEmitter;
-  class TargetAsmInfo;
+  class MachineConstantPoolEntry;
+  class ObjectCodeEmitter;
+  class MCAsmInfo;
   class TargetELFWriterInfo;
+  class TargetLoweringObjectFile;
   class raw_ostream;
-  class ELFSection;
-  class ELFSym;
-  class ELFRelocation;
+  class SectionKind;
+  class MCContext;
+
+  typedef std::vector<ELFSym*>::iterator ELFSymIter;
+  typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+  typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+  typedef SetVector<const char *>::const_iterator PendingExtsIter;
+  typedef std::pair<const Constant *, int64_t> CstExprResTy;
 
   /// ELFWriter - This class implements the common target-independent code for
   /// writing ELF files.  Targets should derive a class from this to
@@ -40,18 +52,18 @@ namespace llvm {
   ///
   class ELFWriter : public MachineFunctionPass {
     friend class ELFCodeEmitter;
+    friend class JITDebugRegisterer;
   public:
     static char ID;
 
-    MachineCodeEmitter &getMachineCodeEmitter() const {
-      return *(MachineCodeEmitter*)MCE;
+    /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+    ObjectCodeEmitter *getObjectCodeEmitter() {
+      return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
     }
 
     ELFWriter(raw_ostream &O, TargetMachine &TM);
     ~ELFWriter();
 
-    typedef std::vector<unsigned char> DataBuffer;
-
   protected:
     /// Output stream to send the resultant object file to.
     raw_ostream &O;
@@ -59,6 +71,9 @@ namespace llvm {
     /// Target machine description.
     TargetMachine &TM;
 
+    /// Context object for machine code objects.
+    MCContext &OutContext;
+    
     /// Target Elf Writer description.
     const TargetELFWriterInfo *TEW;
 
@@ -67,11 +82,15 @@ namespace llvm {
 
     /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
     /// code for functions to the .o file.
-    ELFCodeEmitter *MCE;
+    ELFCodeEmitter *ElfCE;
 
-    /// TAI - Target Asm Info, provide information about section names for
+    /// TLOF - Target Lowering Object File, provide section names for globals 
+    /// and other object file specific stuff
+    const TargetLoweringObjectFile &TLOF;
+
+    /// MAI - Target Asm Info, provide information about section names for
     /// globals and other target specific stuff.
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     //===------------------------------------------------------------------===//
     // Properties inferred automatically from the target machine.
@@ -95,59 +114,49 @@ namespace llvm {
     BinaryObject ElfHdr;
 
     /// SectionList - This is the list of sections that we have emitted to the
-    /// file.  Once the file has been completely built, the section header table
+    /// file. Once the file has been completely built, the section header table
     /// is constructed from this info.
-    std::list<ELFSection> SectionList;
+    std::vector<ELFSection*> SectionList;
     unsigned NumSections;   // Always = SectionList.size()
 
     /// SectionLookup - This is a mapping from section name to section number in
-    /// the SectionList.
+    /// the SectionList. Used to quickly gather the Section Index from MAI names
     std::map<std::string, ELFSection*> SectionLookup;
 
+    /// PendingGlobals - Globals not processed as symbols yet.
+    SetVector<const GlobalValue*> PendingGlobals;
+
     /// GblSymLookup - This is a mapping from global value to a symbol index
-    /// in the symbol table. This is useful since relocations symbol references
-    /// must be quickly mapped to a symbol table index
+    /// in the symbol table or private symbols list. This is useful since reloc
+    /// symbol references must be quickly mapped to their indices on the lists.
     std::map<const GlobalValue*, uint32_t> GblSymLookup;
 
-    /// SymbolList - This is the list of symbols emitted to the symbol table
-    /// Local symbols go to the front and Globals to the back.
-    std::list<ELFSym> SymbolList;
-
-    /// PendingGlobals - List of externally defined symbols that we have been
-    /// asked to emit, but have not seen a reference to.  When a reference
-    /// is seen, the symbol will move from this list to the SymbolList.
-    SetVector<GlobalValue*> PendingGlobals;
-
-    // Remove tab from section name prefix. This is necessary becase TAI 
-    // sometimes return a section name prefixed with a "\t" char. This is
-    // a little bit dirty. FIXME: find a better approach, maybe add more
-    // methods to TAI to get the clean name?
-    void fixNameForSection(std::string &Name) {
-      size_t Pos = Name.find("\t");
-      if (Pos != std::string::npos)
-        Name.erase(Pos, 1);
-
-      Pos = Name.find(".section ");
-      if (Pos != std::string::npos)
-        Name.erase(Pos, 9);
-
-      Pos = Name.find("\n");
-      if (Pos != std::string::npos)
-        Name.erase(Pos, 1);
-    }
+    /// PendingExternals - Externals not processed as symbols yet.
+    SetVector<const char *> PendingExternals;
+
+    /// ExtSymLookup - This is a mapping from externals to a symbol index
+    /// in the symbol table list. This is useful since reloc symbol references
+    /// must be quickly mapped to their symbol table indices.
+    std::map<const char *, uint32_t> ExtSymLookup;
+
+    /// SymbolList - This is the list of symbols emitted to the symbol table.
+    /// When the SymbolList is finally built, local symbols must be placed in
+    /// the beginning while non-locals at the end.
+    std::vector<ELFSym*> SymbolList;
+
+    /// PrivateSyms - Record private symbols, every symbol here must never be
+    /// present in the SymbolList.
+    std::vector<ELFSym*> PrivateSyms;
 
     /// getSection - Return the section with the specified name, creating a new
     /// section if one does not already exist.
     ELFSection &getSection(const std::string &Name, unsigned Type,
                            unsigned Flags = 0, unsigned Align = 0) {
-      std::string SectionName(Name);
-      fixNameForSection(SectionName);
-
-      ELFSection *&SN = SectionLookup[SectionName];
+      ELFSection *&SN = SectionLookup[Name];
       if (SN) return *SN;
 
-      SectionList.push_back(ELFSection(SectionName, isLittleEndian, is64Bit));
-      SN = &SectionList.back();
+      SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+      SN = SectionList.back();
       SN->SectionIdx = NumSections++;
       SN->Type = Type;
       SN->Flags = Flags;
@@ -156,37 +165,6 @@ namespace llvm {
       return *SN;
     }
 
-    /// TODO: support mangled names here to emit the right .text section
-    /// for c++ object files.
-    ELFSection &getTextSection() {
-      return getSection(".text", ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
-    }
-
-    /// Get jump table section on the section name returned by TAI
-    ELFSection &getJumpTableSection(std::string SName, unsigned Align) {
-      return getSection(SName, ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_ALLOC, Align);
-    }
-
-    /// Get a constant pool section based on the section name returned by TAI
-    ELFSection &getConstantPoolSection(std::string SName, unsigned Align) {
-      return getSection(SName, ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_MERGE | ELFSection::SHF_ALLOC, Align);
-    }
-
-    /// Return the relocation section of section 'S'. 'RelA' is true
-    /// if the relocation section contains entries with addends.
-    ELFSection &getRelocSection(std::string SName, bool RelA, unsigned Align) {
-      std::string RelSName(".rel");
-      unsigned SHdrTy = RelA ? ELFSection::SHT_RELA : ELFSection::SHT_REL;
-
-      if (RelA) RelSName.append("a");
-      RelSName.append(SName);
-
-      return getSection(RelSName, SHdrTy, 0, Align);
-    }
-
     ELFSection &getNonExecStackSection() {
       return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
     }
@@ -203,24 +181,38 @@ namespace llvm {
       return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1);
     }
 
-    ELFSection &getDataSection() {
-      return getSection(".data", ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4);
-    }
-
-    ELFSection &getBSSSection() {
-      return getSection(".bss", ELFSection::SHT_NOBITS,
-                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4);
-    }
-
     ELFSection &getNullSection() {
       return getSection("", ELFSection::SHT_NULL, 0);
     }
 
+    ELFSection &getDataSection();
+    ELFSection &getBSSSection();
+    ELFSection &getCtorSection();
+    ELFSection &getDtorSection();
+    ELFSection &getJumpTableSection();
+    ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+    ELFSection &getTextSection(Function *F);
+    ELFSection &getRelocSection(ELFSection &S);
+
     // Helpers for obtaining ELF specific info.
-    unsigned getGlobalELFLinkage(const GlobalValue *GV);
+    unsigned getGlobalELFBinding(const GlobalValue *GV);
+    unsigned getGlobalELFType(const GlobalValue *GV);
     unsigned getGlobalELFVisibility(const GlobalValue *GV);
-    unsigned getElfSectionFlags(unsigned Flags);
+
+    // AddPendingGlobalSymbol - Add a global to be processed and to
+    // the global symbol lookup, use a zero index because the table
+    // index will be determined later.
+    void AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                bool AddToLookup = false);
+    
+    // AddPendingExternalSymbol - Add the external to be processed
+    // and to the external symbol lookup, use a zero index because
+    // the symbol table index will be determined later.
+    void AddPendingExternalSymbol(const char *External);
+
+    // AddToSymbolList - Update the symbol lookup and If the symbol is 
+    // private add it to PrivateSyms list, otherwise to SymbolList. 
+    void AddToSymbolList(ELFSym *GblSym);
 
     // As we complete the ELF file, we need to update fields in the ELF header
     // (e.g. the location of the section table).  These members keep track of
@@ -231,20 +223,27 @@ namespace llvm {
     unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
 
   private:
-    void EmitFunctionDeclaration(const Function *F);
-    void EmitGlobalVar(const GlobalVariable *GV);
+    void EmitGlobal(const GlobalValue *GV);
     void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
     void EmitGlobalConstantStruct(const ConstantStruct *CVS,
                                   ELFSection &GblS);
-    ELFSection &getGlobalSymELFSection(const GlobalVariable *GV, ELFSym &Sym);
+    void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+    void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
+                                  ELFSection &GblS, int64_t Offset = 0);
+    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+    void EmitXXStructorList(Constant *List, ELFSection &Xtor);
     void EmitRelocations();
     void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
     void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
     void EmitSectionTableStringTable();
     void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
     void EmitSymbolTable();
-    void EmitStringTable();
+    void EmitStringTable(const std::string &ModuleName);
     void OutputSectionsAndSectionTable();
+    void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+                       unsigned Size);
+    unsigned SortSymbols();
+    CstExprResTy ResolveConstantExpr(const Constant *CV);
   };
 }
 
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
new file mode 100644
index 0000000..4f32c2b
--- /dev/null
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -0,0 +1,160 @@
+//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a a hazard recognizer using the instructions itineraries
+// defined for the current target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "exact-hazards"
+#include "ExactHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+using namespace llvm;
+
+ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData) :
+  ScheduleHazardRecognizer(), ItinData(LItinData) 
+{
+  // Determine the maximum depth of any itinerary. This determines the
+  // depth of the scoreboard. We always make the scoreboard at least 1
+  // cycle deep to avoid dealing with the boundary condition.
+  ScoreboardDepth = 1;
+  if (!ItinData.isEmpty()) {
+    for (unsigned idx = 0; ; ++idx) {
+      if (ItinData.isEndMarker(idx))
+        break;
+
+      const InstrStage *IS = ItinData.beginStage(idx);
+      const InstrStage *E = ItinData.endStage(idx);
+      unsigned ItinDepth = 0;
+      for (; IS != E; ++IS)
+        ItinDepth += IS->getCycles();
+
+      ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
+    }
+  }
+
+  Scoreboard = new unsigned[ScoreboardDepth];
+  ScoreboardHead = 0;
+
+  DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " 
+               << ScoreboardDepth << '\n');
+}
+
+ExactHazardRecognizer::~ExactHazardRecognizer() {
+  delete [] Scoreboard;
+}
+
+void ExactHazardRecognizer::Reset() {
+  memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
+  ScoreboardHead = 0;
+}
+
+unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
+  return (ScoreboardHead + offset) % ScoreboardDepth;
+}
+
+void ExactHazardRecognizer::dumpScoreboard() {
+  errs() << "Scoreboard:\n";
+  
+  unsigned last = ScoreboardDepth - 1;
+  while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+    last--;
+
+  for (unsigned i = 0; i <= last; i++) {
+    unsigned FUs = Scoreboard[getFutureIndex(i)];
+    errs() << "\t";
+    for (int j = 31; j >= 0; j--)
+      errs() << ((FUs & (1 << j)) ? '1' : '0');
+    errs() << '\n';
+  }
+}
+
+ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) {
+  if (ItinData.isEmpty())
+    return NoHazard;
+
+  unsigned cycle = 0;
+
+  // Use the itinerary for the underlying instruction to check for
+  // free FU's in the scoreboard at the appropriate future cycles.
+  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
+  for (const InstrStage *IS = ItinData.beginStage(idx),
+         *E = ItinData.endStage(idx); IS != E; ++IS) {
+    // We must find one of the stage's units free for every cycle the
+    // stage is occupied. FIXME it would be more accurate to find the
+    // same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < ScoreboardDepth) && 
+             "Scoreboard depth exceeded!");
+      
+      unsigned index = getFutureIndex(cycle + i);
+      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+      if (!freeUnits) {
+        DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(SU->getInstr()->dump());
+        return Hazard;
+      }
+    }
+    
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  return NoHazard;
+}
+    
+void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
+  if (ItinData.isEmpty())
+    return;
+
+  unsigned cycle = 0;
+
+  // Use the itinerary for the underlying instruction to reserve FU's
+  // in the scoreboard at the appropriate future cycles.
+  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
+  for (const InstrStage *IS = ItinData.beginStage(idx), 
+         *E = ItinData.endStage(idx); IS != E; ++IS) {
+    // We must reserve one of the stage's units for every cycle the
+    // stage is occupied. FIXME it would be more accurate to reserve
+    // the same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < ScoreboardDepth) &&
+             "Scoreboard depth exceeded!");
+      
+      unsigned index = getFutureIndex(cycle + i);
+      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+      
+      // reduce to a single unit
+      unsigned freeUnit = 0;
+      do {
+        freeUnit = freeUnits;
+        freeUnits = freeUnit & (freeUnit - 1);
+      } while (freeUnits);
+      
+      assert(freeUnit && "No function unit available!");
+      Scoreboard[index] |= freeUnit;
+    }
+    
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+  
+  DEBUG(dumpScoreboard());
+}
+    
+void ExactHazardRecognizer::AdvanceCycle() {
+  Scoreboard[ScoreboardHead] = 0;
+  ScoreboardHead = getFutureIndex(1);
+}
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
new file mode 100644
index 0000000..71ac979
--- /dev/null
+++ b/lib/CodeGen/ExactHazardRecognizer.h
@@ -0,0 +1,61 @@
+//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ExactHazardRecognizer class, which
+// implements hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+namespace llvm {
+  class ExactHazardRecognizer : public ScheduleHazardRecognizer {
+    // Itinerary data for the target.
+    const InstrItineraryData &ItinData;
+
+    // Scoreboard to track function unit usage. Scoreboard[0] is a
+    // mask of the FUs in use in the cycle currently being
+    // schedule. Scoreboard[1] is a mask for the next cycle. The
+    // Scoreboard is used as a circular buffer with the current cycle
+    // indicated by ScoreboardHead.
+    unsigned *Scoreboard;
+
+    // The maximum number of cycles monitored by the Scoreboard. This
+    // value is determined based on the target itineraries to ensure
+    // that all hazards can be tracked.
+    unsigned ScoreboardDepth;
+
+    // Indices into the Scoreboard that represent the current cycle.
+    unsigned ScoreboardHead;
+
+    // Return the scoreboard index to use for 'offset' cycles in the
+    // future. 'offset' of 0 returns ScoreboardHead.
+    unsigned getFutureIndex(unsigned offset);
+
+    // Print the scoreboard.
+    void dumpScoreboard();
+
+  public:
+    ExactHazardRecognizer(const InstrItineraryData &ItinData);
+    ~ExactHazardRecognizer();
+    
+    virtual HazardType getHazardType(SUnit *SU);
+    virtual void Reset();
+    virtual void EmitInstruction(SUnit *SU);
+    virtual void AdvanceCycle();
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index cf2ebb3..a57296c 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -18,17 +18,20 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
-
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
   
   class VISIBILITY_HIDDEN Printer : public FunctionPass {
     static char ID;
-    std::ostream &OS;
+    raw_ostream &OS;
     
   public:
-    explicit Printer(std::ostream &OS = *cerr);
+    Printer() : FunctionPass(&ID), OS(errs()) {}
+    explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {}
+
     
     const char *getPassName() const;
     void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -74,27 +77,24 @@ GCModuleInfo::~GCModuleInfo() {
 
 GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
                                               const std::string &Name) {
-  const char *Start = Name.c_str();
-  
-  strategy_map_type::iterator NMI =
-    StrategyMap.find(Start, Start + Name.size());
+  strategy_map_type::iterator NMI = StrategyMap.find(Name);
   if (NMI != StrategyMap.end())
     return NMI->getValue();
   
   for (GCRegistry::iterator I = GCRegistry::begin(),
                             E = GCRegistry::end(); I != E; ++I) {
-    if (strcmp(Start, I->getName()) == 0) {
+    if (Name == I->getName()) {
       GCStrategy *S = I->instantiate();
       S->M = M;
       S->Name = Name;
-      StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S);
+      StrategyMap.GetOrCreateValue(Name).setValue(S);
       StrategyList.push_back(S);
       return S;
     }
   }
-  
-  cerr << "unsupported GC: " << Name << "\n";
-  abort();
+ 
+  errs() << "unsupported GC: " << Name << "\n";
+  llvm_unreachable(0);
 }
 
 GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
@@ -124,12 +124,10 @@ void GCModuleInfo::clear() {
 
 char Printer::ID = 0;
 
-FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) {
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
   return new Printer(OS);
 }
 
-Printer::Printer(std::ostream &OS)
-  : FunctionPass(&ID), OS(OS) {}
 
 const char *Printer::getPassName() const {
   return "Print Garbage Collector Information";
@@ -143,7 +141,7 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-    default: assert(0 && "Unknown GC point kind");
+    default: llvm_unreachable("Unknown GC point kind");
     case GC::Loop:     return "loop";
     case GC::Return:   return "return";
     case GC::PreCall:  return "pre-call";
@@ -155,12 +153,12 @@ bool Printer::runOnFunction(Function &F) {
   if (!F.hasGC()) {
     GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
     
-    OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n";
+    OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
     for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
                                         RE = FD->roots_end(); RI != RE; ++RI)
       OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
     
-    OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n";
+    OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
     for (GCFunctionInfo::iterator PI = FD->begin(),
                                   PE = FD->end(); PI != PE; ++PI) {
       
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index 5a5ef84..9cd2925 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -20,11 +20,11 @@ GCMetadataPrinter::GCMetadataPrinter() { }
 GCMetadataPrinter::~GCMetadataPrinter() { }
 
 void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                      const TargetAsmInfo &TAI) {
+                                      const MCAsmInfo &MAI) {
   // Default is no action.
 }
 
 void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                       const TargetAsmInfo &TAI) {
+                                       const MCAsmInfo &MAI) {
   // Default is no action.
 }
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index ad7421a..6d0de41 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -28,6 +28,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -70,7 +72,8 @@ namespace {
     void FindSafePoints(MachineFunction &MF);
     void VisitCallPoint(MachineBasicBlock::iterator MI);
     unsigned InsertLabel(MachineBasicBlock &MBB, 
-                         MachineBasicBlock::iterator MI) const;
+                         MachineBasicBlock::iterator MI,
+                         DebugLoc DL) const;
     
     void FindStackOffsets(MachineFunction &MF);
     
@@ -107,8 +110,8 @@ GCStrategy::~GCStrategy() {
 bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
  
 bool GCStrategy::performCustomLowering(Function &F) {
-  cerr << "gc " << getName() << " must override performCustomLowering.\n";
-  abort();
+  errs() << "gc " << getName() << " must override performCustomLowering.\n";
+  llvm_unreachable(0);
   return 0;
 }
 
@@ -327,11 +330,13 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
-                                     MachineBasicBlock::iterator MI) const {
+                                     MachineBasicBlock::iterator MI,
+                                     DebugLoc DL) const {
   unsigned Label = MMI->NextLabelID();
-  // N.B. we assume that MI is *not* equal to the "end()" iterator.
-  BuildMI(MBB, MI, MI->getDebugLoc(),
+  
+  BuildMI(MBB, MI, DL,
           TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label);
+  
   return Label;
 }
 
@@ -342,10 +347,12 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
   ++RAI;                                
   
   if (FI->getStrategy().needsSafePoint(GC::PreCall))
-    FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI));
+    FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
+                                              CI->getDebugLoc()));
   
   if (FI->getStrategy().needsSafePoint(GC::PostCall))
-    FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI));
+    FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
+                                               CI->getDebugLoc()));
 }
 
 void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index d5e7ea5..7b613ff 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -21,6 +22,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -226,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   if (!TII) return false;
 
-  DOUT << "\nIfcvt: function (" << ++FnNum <<  ") \'"
-       << MF.getFunction()->getName() << "\'";
+  DEBUG(errs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+               << MF.getFunction()->getName() << "\'");
 
   if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
-    DOUT << " skipped\n";
+    DEBUG(errs() << " skipped\n");
     return false;
   }
-  DOUT << "\n";
+  DEBUG(errs() << "\n");
 
   MF.RenumberBlocks();
   BBAnalysis.resize(MF.getNumBlockIDs());
@@ -278,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
         if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
-        DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
-             << "): BB#" << BBI.BB->getNumber() << " ("
-             << ((Kind == ICSimpleFalse)
-                 ? BBI.FalseBB->getNumber()
-                 : BBI.TrueBB->getNumber()) << ") ";
+        DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+                     << "): BB#" << BBI.BB->getNumber() << " ("
+                     << ((Kind == ICSimpleFalse)
+                         ? BBI.FalseBB->getNumber()
+                         : BBI.TrueBB->getNumber()) << ") ");
         RetVal = IfConvertSimple(BBI, Kind);
-        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) NumSimpleFalse++;
           else         NumSimple++;
@@ -301,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         if (DisableTriangleR && !isFalse && isRev) break;
         if (DisableTriangleF && isFalse && !isRev) break;
         if (DisableTriangleFR && isFalse && isRev) break;
-        DOUT << "Ifcvt (Triangle";
+        DEBUG(errs() << "Ifcvt (Triangle");
         if (isFalse)
-          DOUT << " false";
+          DEBUG(errs() << " false");
         if (isRev)
-          DOUT << " rev";
-        DOUT << "): BB#" << BBI.BB->getNumber() << " (T:"
-             << BBI.TrueBB->getNumber() << ",F:"
-             << BBI.FalseBB->getNumber() << ") ";
+          DEBUG(errs() << " rev");
+        DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertTriangle(BBI, Kind);
-        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) {
             if (isRev) NumTriangleFRev++;
@@ -324,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       }
       case ICDiamond: {
         if (DisableDiamond) break;
-        DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
-             << BBI.TrueBB->getNumber() << ",F:"
-             << BBI.FalseBB->getNumber() << ") ";
+        DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
-        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) NumDiamonds++;
         break;
       }
@@ -358,6 +361,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   Roots.clear();
   BBAnalysis.clear();
 
+  if (MadeChange) {
+    BranchFolder BF(false);
+    BF.OptimizeFunction(MF, TII,
+                        MF.getTarget().getRegisterInfo(),
+                        getAnalysisIfAvailable<MachineModuleInfo>());
+  }
+
   return MadeChange;
 }
 
@@ -1130,8 +1140,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     if (TII->isPredicated(I))
       continue;
     if (!TII->PredicateInstruction(I, Cond)) {
-      cerr << "Unable to predicate " << *I << "!\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Unable to predicate " << *I << "!\n";
+#endif
+      llvm_unreachable(0);
     }
   }
 
@@ -1164,8 +1176,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
     if (!isPredicated)
       if (!TII->PredicateInstruction(MI, Cond)) {
-        cerr << "Unable to predicate " << *MI << "!\n";
-        abort();
+#ifndef NDEBUG
+        errs() << "Unable to predicate " << *I << "!\n";
+#endif
+        llvm_unreachable(0);
       }
   }
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 052334a..3e3b28a 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -16,7 +16,9 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
@@ -39,11 +41,11 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
   switch((int)Fn->arg_begin()->getType()->getTypeID()) {
   case Type::FloatTyID:
     EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
-                         Type::FloatTy);
+                         Type::getFloatTy(M.getContext()));
     break;
   case Type::DoubleTyID:
     EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
-                         Type::DoubleTy);
+                         Type::getDoubleTy(M.getContext()));
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
@@ -82,39 +84,43 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 }
 
 void IntrinsicLowering::AddPrototypes(Module &M) {
+  LLVMContext &Context = M.getContext();
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     if (I->isDeclaration() && !I->use_empty())
       switch (I->getIntrinsicID()) {
       default: break;
       case Intrinsic::setjmp:
         EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
-                             Type::Int32Ty);
+                             Type::getInt32Ty(M.getContext()));
         break;
       case Intrinsic::longjmp:
         EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
-                             Type::VoidTy);
+                             Type::getVoidTy(M.getContext()));
         break;
       case Intrinsic::siglongjmp:
         EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
-                             Type::VoidTy);
+                             Type::getVoidTy(M.getContext()));
         break;
       case Intrinsic::memcpy:
-        M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              TD.getIntPtrType(), (Type *)0);
+        M.getOrInsertFunction("memcpy",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::memmove:
-        M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              TD.getIntPtrType(), (Type *)0);
+        M.getOrInsertFunction("memmove",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::memset:
-        M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              Type::Int32Ty, 
-                              TD.getIntPtrType(), (Type *)0);
+        M.getOrInsertFunction("memset",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt32Ty(M.getContext()), 
+                              TD.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::sqrt:
         EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
@@ -148,7 +154,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
 
 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
 /// instruction IP.
-static Value *LowerBSWAP(Value *V, Instruction *IP) {
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
@@ -156,7 +162,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
   IRBuilder<> Builder(IP->getParent(), IP);
 
   switch(BitSize) {
-  default: assert(0 && "Unhandled type size of value to byteswap!");
+  default: llvm_unreachable("Unhandled type size of value to byteswap!");
   case 16: {
     Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
                                     "bswap.2");
@@ -172,11 +178,13 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
                                     "bswap.3");
     Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
                                      "bswap.2");
-    Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+    Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
                                      "bswap.1");
-    Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000),
+    Tmp3 = Builder.CreateAnd(Tmp3,
+                         ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
                              "bswap.and3");
-    Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00),
+    Tmp2 = Builder.CreateAnd(Tmp2,
+                           ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
                              "bswap.and2");
     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
@@ -194,31 +202,38 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
                                     "bswap.5");
     Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
                                      "bswap.4");
-    Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+    Value* Tmp3 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 24),
                                      "bswap.3");
-    Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40),
+    Value* Tmp2 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 40),
                                      "bswap.2");
-    Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56),
+    Value* Tmp1 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 56),
                                      "bswap.1");
     Tmp7 = Builder.CreateAnd(Tmp7,
-                             ConstantInt::get(Type::Int64Ty,
+                             ConstantInt::get(Type::getInt64Ty(Context),
                                               0xFF000000000000ULL),
                              "bswap.and7");
     Tmp6 = Builder.CreateAnd(Tmp6,
-                             ConstantInt::get(Type::Int64Ty,
+                             ConstantInt::get(Type::getInt64Ty(Context),
                                               0xFF0000000000ULL),
                              "bswap.and6");
     Tmp5 = Builder.CreateAnd(Tmp5,
-                             ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00000000ULL),
                              "bswap.and5");
     Tmp4 = Builder.CreateAnd(Tmp4,
-                             ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF000000ULL),
                              "bswap.and4");
     Tmp3 = Builder.CreateAnd(Tmp3,
-                             ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF0000ULL),
                              "bswap.and3");
     Tmp2 = Builder.CreateAnd(Tmp2,
-                             ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00ULL),
                              "bswap.and2");
     Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
     Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
@@ -235,7 +250,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
 
 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
 /// instruction IP.
-static Value *LowerCTPOP(Value *V, Instruction *IP) {
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
 
   static const uint64_t MaskValues[6] = {
@@ -257,7 +272,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) {
       Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
       Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
       Value *VShift = Builder.CreateLShr(PartValue,
-                                         ConstantInt::get(V->getType(), i),
+                                        ConstantInt::get(V->getType(), i),
                                          "ctpop.sh");
       Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
       PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
@@ -275,7 +290,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) {
 
 /// LowerCTLZ - Emit the code to lower ctlz of V before the specified
 /// instruction IP.
-static Value *LowerCTLZ(Value *V, Instruction *IP) {
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
 
   IRBuilder<> Builder(IP->getParent(), IP);
 
@@ -287,353 +302,21 @@ static Value *LowerCTLZ(Value *V, Instruction *IP) {
   }
 
   V = Builder.CreateNot(V);
-  return LowerCTPOP(V, IP);
-}
-
-/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes 
-/// three integer arguments. The first argument is the Value from which the
-/// bits will be selected. It may be of any bit width. The second and third
-/// arguments specify a range of bits to select with the second argument 
-/// specifying the low bit and the third argument specifying the high bit. Both
-/// must be type i32. The result is the corresponding selected bits from the
-/// Value in the same width as the Value (first argument). If the low bit index
-/// is higher than the high bit index then the inverse selection is done and 
-/// the bits are returned in inverse order. 
-/// @brief Lowering of llvm.part.select intrinsic.
-static Instruction *LowerPartSelect(CallInst *CI) {
-  IRBuilder<> Builder;
-
-  // Make sure we're dealing with a part select intrinsic here
-  Function *F = CI->getCalledFunction();
-  const FunctionType *FT = F->getFunctionType();
-  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
-      FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
-      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
-    return CI;
-
-  // Get the intrinsic implementation function by converting all the . to _
-  // in the intrinsic's function name and then reconstructing the function
-  // declaration.
-  std::string Name(F->getName());
-  for (unsigned i = 4; i < Name.length(); ++i)
-    if (Name[i] == '.')
-      Name[i] = '_';
-  Module* M = F->getParent();
-  F = cast<Function>(M->getOrInsertFunction(Name, FT));
-  F->setLinkage(GlobalValue::WeakAnyLinkage);
-
-  // If we haven't defined the impl function yet, do so now
-  if (F->isDeclaration()) {
-
-    // Get the arguments to the function
-    Function::arg_iterator args = F->arg_begin();
-    Value* Val = args++; Val->setName("Val");
-    Value* Lo = args++; Lo->setName("Lo");
-    Value* Hi = args++; Hi->setName("High");
-
-    // We want to select a range of bits here such that [Hi, Lo] is shifted
-    // down to the low bits. However, it is quite possible that Hi is smaller
-    // than Lo in which case the bits have to be reversed. 
-    
-    // Create the blocks we will need for the two cases (forward, reverse)
-    BasicBlock* CurBB   = BasicBlock::Create("entry", F);
-    BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
-    BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
-    BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
-    BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
-    BasicBlock *RsltBlk = BasicBlock::Create("result",  CurBB->getParent());
-
-    Builder.SetInsertPoint(CurBB);
-
-    // Cast Hi and Lo to the size of Val so the widths are all the same
-    if (Hi->getType() != Val->getType())
-      Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false,
-                                 "tmp");
-    if (Lo->getType() != Val->getType())
-      Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false,
-                                 "tmp");
-
-    // Compute a few things that both cases will need, up front.
-    Constant* Zero = ConstantInt::get(Val->getType(), 0);
-    Constant* One = ConstantInt::get(Val->getType(), 1);
-    Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
-
-    // Compare the Hi and Lo bit positions. This is used to determine 
-    // which case we have (forward or reverse)
-    Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less");
-    Builder.CreateCondBr(Cmp, RevSize, FwdSize);
-
-    // First, compute the number of bits in the forward case.
-    Builder.SetInsertPoint(FwdSize);
-    Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits");
-    Builder.CreateBr(Compute);
-
-    // Second, compute the number of bits in the reverse case.
-    Builder.SetInsertPoint(RevSize);
-    Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits");
-    Builder.CreateBr(Compute);
-
-    // Now, compute the bit range. Start by getting the bitsize and the shift
-    // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for 
-    // the number of bits we want in the range. We shift the bits down to the 
-    // least significant bits, apply the mask to zero out unwanted high bits, 
-    // and we have computed the "forward" result. It may still need to be 
-    // reversed.
-    Builder.SetInsertPoint(Compute);
-
-    // Get the BitSize from one of the two subtractions
-    PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits");
-    BitSize->reserveOperandSpace(2);
-    BitSize->addIncoming(FBitSize, FwdSize);
-    BitSize->addIncoming(RBitSize, RevSize);
-
-    // Get the ShiftAmount as the smaller of Hi/Lo
-    PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt");
-    ShiftAmt->reserveOperandSpace(2);
-    ShiftAmt->addIncoming(Lo, FwdSize);
-    ShiftAmt->addIncoming(Hi, RevSize);
-
-    // Increment the bit size
-    Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits");
-
-    // Create a Mask to zero out the high order bits.
-    Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask");
-    Mask = Builder.CreateNot(Mask, "mask");
-
-    // Shift the bits down and apply the mask
-    Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres");
-    FRes = Builder.CreateAnd(FRes, Mask, "fres");
-    Builder.CreateCondBr(Cmp, Reverse, RsltBlk);
-
-    // In the Reverse block we have the mask already in FRes but we must reverse
-    // it by shifting FRes bits right and putting them in RRes by shifting them 
-    // in from left.
-    Builder.SetInsertPoint(Reverse);
-
-    // First set up our loop counters
-    PHINode *Count = Builder.CreatePHI(Val->getType(), "count");
-    Count->reserveOperandSpace(2);
-    Count->addIncoming(BitSizePlusOne, Compute);
-
-    // Next, get the value that we are shifting.
-    PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
-    BitsToShift->reserveOperandSpace(2);
-    BitsToShift->addIncoming(FRes, Compute);
-
-    // Finally, get the result of the last computation
-    PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
-    RRes->reserveOperandSpace(2);
-    RRes->addIncoming(Zero, Compute);
-
-    // Decrement the counter
-    Value *Decr = Builder.CreateSub(Count, One, "decr");
-    Count->addIncoming(Decr, Reverse);
-
-    // Compute the Bit that we want to move
-    Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit");
-
-    // Compute the new value for next iteration.
-    Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift");
-    BitsToShift->addIncoming(NewVal, Reverse);
-
-    // Shift the bit into the low bits of the result.
-    Value *NewRes = Builder.CreateShl(RRes, One, "lshift");
-    NewRes = Builder.CreateOr(NewRes, Bit, "addbit");
-    RRes->addIncoming(NewRes, Reverse);
-    
-    // Terminate loop if we've moved all the bits.
-    Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond");
-    Builder.CreateCondBr(Cond, RsltBlk, Reverse);
-
-    // Finally, in the result block, select one of the two results with a PHI
-    // node and return the result;
-    Builder.SetInsertPoint(RsltBlk);
-    PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select");
-    BitSelect->reserveOperandSpace(2);
-    BitSelect->addIncoming(FRes, Compute);
-    BitSelect->addIncoming(NewRes, Reverse);
-    Builder.CreateRet(BitSelect);
-  }
-
-  // Return a call to the implementation function
-  Builder.SetInsertPoint(CI->getParent(), CI);
-  CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1),
-                                        CI->getOperand(2), CI->getOperand(3));
-  NewCI->setName(CI->getName());
-  return NewCI;
-}
-
-/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes 
-/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
-/// The first two arguments can be any bit width. The result is the same width
-/// as %Value. The operation replaces bits between %Low and %High with the value
-/// in %Replacement. If %Replacement is not the same width, it is truncated or
-/// zero extended as appropriate to fit the bits being replaced. If %Low is
-/// greater than %High then the inverse set of bits are replaced.
-/// @brief Lowering of llvm.bit.part.set intrinsic.
-static Instruction *LowerPartSet(CallInst *CI) {
-  IRBuilder<> Builder;
-
-  // Make sure we're dealing with a part select intrinsic here
-  Function *F = CI->getCalledFunction();
-  const FunctionType *FT = F->getFunctionType();
-  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
-      FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
-      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
-      !FT->getParamType(3)->isInteger())
-    return CI;
-
-  // Get the intrinsic implementation function by converting all the . to _
-  // in the intrinsic's function name and then reconstructing the function
-  // declaration.
-  std::string Name(F->getName());
-  for (unsigned i = 4; i < Name.length(); ++i)
-    if (Name[i] == '.')
-      Name[i] = '_';
-  Module* M = F->getParent();
-  F = cast<Function>(M->getOrInsertFunction(Name, FT));
-  F->setLinkage(GlobalValue::WeakAnyLinkage);
-
-  // If we haven't defined the impl function yet, do so now
-  if (F->isDeclaration()) {
-    // Get the arguments for the function.
-    Function::arg_iterator args = F->arg_begin();
-    Value* Val = args++; Val->setName("Val");
-    Value* Rep = args++; Rep->setName("Rep");
-    Value* Lo  = args++; Lo->setName("Lo");
-    Value* Hi  = args++; Hi->setName("Hi");
-
-    // Get some types we need
-    const IntegerType* ValTy = cast<IntegerType>(Val->getType());
-    const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
-    uint32_t RepBits = RepTy->getBitWidth();
-
-    // Constant Definitions
-    ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
-    ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
-    ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
-    ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
-    ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
-    ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
-    ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
-
-    // Basic blocks we fill in below.
-    BasicBlock* entry = BasicBlock::Create("entry", F, 0);
-    BasicBlock* large = BasicBlock::Create("large", F, 0);
-    BasicBlock* small = BasicBlock::Create("small", F, 0);
-    BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
-    BasicBlock* result = BasicBlock::Create("result", F, 0);
-
-    // BASIC BLOCK: entry
-    Builder.SetInsertPoint(entry);
-    // First, get the number of bits that we're placing as an i32
-    Value* is_forward = Builder.CreateICmpULT(Lo, Hi);
-    Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo);
-    Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi);
-    Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn);
-    NumBits = Builder.CreateAdd(NumBits, One);
-    // Now, convert Lo and Hi to ValTy bit width
-    Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false);
-    // Determine if the replacement bits are larger than the number of bits we
-    // are replacing and deal with it.
-    Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth);
-    Builder.CreateCondBr(is_large, large, small);
-
-    // BASIC BLOCK: large
-    Builder.SetInsertPoint(large);
-    Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits);
-    MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(),
-                                     /* isSigned */ false);
-    Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits);
-    Value* Rep2 = Builder.CreateAnd(Mask1, Rep);
-    Builder.CreateBr(small);
-
-    // BASIC BLOCK: small
-    Builder.SetInsertPoint(small);
-    PHINode* Rep3 = Builder.CreatePHI(RepTy);
-    Rep3->reserveOperandSpace(2);
-    Rep3->addIncoming(Rep2, large);
-    Rep3->addIncoming(Rep, entry);
-    Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false);
-    Builder.CreateCondBr(is_forward, result, reverse);
-
-    // BASIC BLOCK: reverse (reverses the bits of the replacement)
-    Builder.SetInsertPoint(reverse);
-    // Set up our loop counter as a PHI so we can decrement on each iteration.
-    // We will loop for the number of bits in the replacement value.
-    PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count");
-    Count->reserveOperandSpace(2);
-    Count->addIncoming(NumBits, small);
-
-    // Get the value that we are shifting bits out of as a PHI because
-    // we'll change this with each iteration.
-    PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
-    BitsToShift->reserveOperandSpace(2);
-    BitsToShift->addIncoming(Rep4, small);
-
-    // Get the result of the last computation or zero on first iteration
-    PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
-    RRes->reserveOperandSpace(2);
-    RRes->addIncoming(ValZero, small);
-
-    // Decrement the loop counter by one
-    Value *Decr = Builder.CreateSub(Count, One);
-    Count->addIncoming(Decr, reverse);
-
-    // Get the bit that we want to move into the result
-    Value *Bit = Builder.CreateAnd(BitsToShift, ValOne);
-
-    // Compute the new value of the bits to shift for the next iteration.
-    Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne);
-    BitsToShift->addIncoming(NewVal, reverse);
-
-    // Shift the bit we extracted into the low bit of the result.
-    Value *NewRes = Builder.CreateShl(RRes, ValOne);
-    NewRes = Builder.CreateOr(NewRes, Bit);
-    RRes->addIncoming(NewRes, reverse);
-    
-    // Terminate loop if we've moved all the bits.
-    Value *Cond = Builder.CreateICmpEQ(Decr, Zero);
-    Builder.CreateCondBr(Cond, result, reverse);
-
-    // BASIC BLOCK: result
-    Builder.SetInsertPoint(result);
-    PHINode *Rplcmnt = Builder.CreatePHI(Val->getType());
-    Rplcmnt->reserveOperandSpace(2);
-    Rplcmnt->addIncoming(NewRes, reverse);
-    Rplcmnt->addIncoming(Rep4, small);
-    Value* t0   = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false);
-    Value* t1   = Builder.CreateShl(ValMask, Lo);
-    Value* t2   = Builder.CreateNot(t1);
-    Value* t3   = Builder.CreateShl(t1, t0);
-    Value* t4   = Builder.CreateOr(t2, t3);
-    Value* t5   = Builder.CreateAnd(t4, Val);
-    Value* t6   = Builder.CreateShl(Rplcmnt, Lo);
-    Value* Rslt = Builder.CreateOr(t5, t6, "part_set");
-    Builder.CreateRet(Rslt);
-  }
-
-  // Return a call to the implementation function
-  Builder.SetInsertPoint(CI->getParent(), CI);
-  CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1),
-                                        CI->getOperand(2), CI->getOperand(3),
-                                        CI->getOperand(4));
-  NewCI->setName(CI->getName());
-  return NewCI;
+  return LowerCTPOP(Context, V, IP);
 }
 
 static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
                                        const char *Dname,
                                        const char *LDname) {
   switch (CI->getOperand(1)->getType()->getTypeID()) {
-  default: assert(0 && "Invalid type in intrinsic"); abort();
+  default: llvm_unreachable("Invalid type in intrinsic");
   case Type::FloatTyID:
     ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::FloatTy);
+                  Type::getFloatTy(CI->getContext()));
     break;
   case Type::DoubleTyID:
     ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::DoubleTy);
+                  Type::getDoubleTy(CI->getContext()));
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
@@ -646,19 +329,18 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
 
 void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   IRBuilder<> Builder(CI->getParent(), CI);
+  LLVMContext &Context = CI->getContext();
 
   Function *Callee = CI->getCalledFunction();
   assert(Callee && "Cannot lower an indirect call!");
 
   switch (Callee->getIntrinsicID()) {
   case Intrinsic::not_intrinsic:
-    cerr << "Cannot lower a call to a non-intrinsic function '"
-         << Callee->getName() << "'!\n";
-    abort();
+    llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
+                      Callee->getName() + "'!");
   default:
-    cerr << "Error: Code generator does not support intrinsic function '"
-         << Callee->getName() << "'!\n";
-    abort();
+    llvm_report_error("Code generator does not support intrinsic function '"+
+                      Callee->getName()+"'!");
 
     // The setjmp/longjmp intrinsics should only exist in the code if it was
     // never optimized (ie, right out of the CFE), or if it has been hacked on
@@ -666,38 +348,38 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     // convert the call to an explicit setjmp or longjmp call.
   case Intrinsic::setjmp: {
     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                               Type::Int32Ty);
-    if (CI->getType() != Type::VoidTy)
+                               Type::getInt32Ty(Context));
+    if (CI->getType() != Type::getVoidTy(Context))
       CI->replaceAllUsesWith(V);
     break;
   }
   case Intrinsic::sigsetjmp:
-     if (CI->getType() != Type::VoidTy)
+     if (CI->getType() != Type::getVoidTy(Context))
        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
      break;
 
   case Intrinsic::longjmp: {
     ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                    Type::VoidTy);
+                    Type::getVoidTy(Context));
     break;
   }
 
   case Intrinsic::siglongjmp: {
     // Insert the call to abort
     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
-                    Type::VoidTy);
+                    Type::getVoidTy(Context));
     break;
   }
   case Intrinsic::ctpop:
-    CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
+    CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI));
     break;
 
   case Intrinsic::bswap:
-    CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
+    CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI));
     break;
     
   case Intrinsic::ctlz:
-    CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
+    CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI));
     break;
 
   case Intrinsic::cttz: {
@@ -707,24 +389,16 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     NotSrc->setName(Src->getName() + ".not");
     Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
     SrcM1 = Builder.CreateSub(Src, SrcM1);
-    Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI);
+    Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
     CI->replaceAllUsesWith(Src);
     break;
   }
 
-  case Intrinsic::part_select:
-    CI->replaceAllUsesWith(LowerPartSelect(CI));
-    break;
-
-  case Intrinsic::part_set:
-    CI->replaceAllUsesWith(LowerPartSet(CI));
-    break;
-
   case Intrinsic::stacksave:
   case Intrinsic::stackrestore: {
     if (!Warned)
-      cerr << "WARNING: this target does not support the llvm.stack"
-           << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+      errs() << "WARNING: this target does not support the llvm.stack"
+             << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
                "save" : "restore") << " intrinsic.\n";
     Warned = true;
     if (Callee->getIntrinsicID() == Intrinsic::stacksave)
@@ -734,8 +408,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     
   case Intrinsic::returnaddress:
   case Intrinsic::frameaddress:
-    cerr << "WARNING: this target does not support the llvm."
-         << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+    errs() << "WARNING: this target does not support the llvm."
+           << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
              "return" : "frame") << "address intrinsic.\n";
     CI->replaceAllUsesWith(ConstantPointerNull::get(
                                             cast<PointerType>(CI->getType())));
@@ -747,9 +421,9 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   case Intrinsic::pcmarker:
     break;    // Simply strip out pcmarker on unsupported architectures
   case Intrinsic::readcyclecounter: {
-    cerr << "WARNING: this target does not support the llvm.readcyclecoun"
-         << "ter intrinsic.  It is being lowered to a constant 0\n";
-    CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
+    errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+           << "ter intrinsic.  It is being lowered to a constant 0\n";
+    CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
     break;
   }
 
@@ -761,13 +435,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;    // Simply strip out debugging intrinsics
 
   case Intrinsic::eh_exception:
-  case Intrinsic::eh_selector_i32:
-  case Intrinsic::eh_selector_i64:
+  case Intrinsic::eh_selector:
     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
     break;
 
-  case Intrinsic::eh_typeid_for_i32:
-  case Intrinsic::eh_typeid_for_i64:
+  case Intrinsic::eh_typeid_for:
     // Return something different to eh_selector.
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
     break;
@@ -776,7 +448,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    const IntegerType *IntPtr = TD.getIntPtrType();
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -787,7 +459,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memmove: {
-    const IntegerType *IntPtr = TD.getIntPtrType();
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -798,13 +470,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memset: {
-    const IntegerType *IntPtr = TD.getIntPtrType();
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
     Ops[0] = CI->getOperand(1);
     // Extend the amount to i32.
-    Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
+    Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context),
                                    /* isSigned */ false);
     Ops[2] = Size;
     ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
@@ -840,7 +512,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   }
   case Intrinsic::flt_rounds:
      // Lower to "round to the nearest"
-     if (CI->getType() != Type::VoidTy)
+     if (CI->getType() != Type::getVoidTy(Context))
        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
      break;
   }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a163cac..4e713a6 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -15,14 +15,16 @@
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
 namespace llvm {
@@ -37,26 +39,31 @@ static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden,
     cl::desc("Dump emitter generated instructions as assembly"));
 static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
+static cl::opt<bool> HoistConstants("hoist-constants", cl::Hidden,
+    cl::desc("Hoist constants out of loops"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
-// When this works it will be on by default.
-static cl::opt<bool>
-DisablePostRAScheduler("disable-post-RA-scheduler",
-                       cl::desc("Disable scheduling after register allocation"),
-                       cl::init(true));
-
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -fast.
+// able to enable or disable fast-isel independently from -O0.
 static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
-  cl::desc("Enable the experimental \"fast\" instruction selector"));
+  cl::desc("Enable the \"fast\" instruction selector"));
+
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+                                     const std::string &TargetTriple)
+  : TargetMachine(T) {
+  AsmInfo = T.createAsmInfo(TargetTriple);
+}
+
+
 
 FileModel::Model
 LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                       raw_ostream &Out,
+                                       formatted_raw_ostream &Out,
                                        CodeGenFileType FileType,
                                        CodeGenOpt::Level OptLevel) {
   // Add common CodeGen passes.
@@ -67,10 +74,10 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   PM.add(createDebugLabelFoldingPass());
 
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
   if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
   if (OptLevel != CodeGenOpt::None)
     PM.add(createCodePlacementOptPass());
@@ -92,6 +99,19 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   return FileModel::Error;
 }
 
+bool LLVMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+                                           CodeGenOpt::Level OptLevel,
+                                           bool Verbose,
+                                           formatted_raw_ostream &Out) {
+  FunctionPass *Printer =
+    getTarget().createAsmPrinter(Out, *this, getMCAsmInfo(), Verbose);
+  if (!Printer)
+    return true;
+
+  PM.add(Printer);
+  return false;
+}
+
 /// addPassesToEmitFileFinish - If the passes to emit the specified file had to
 /// be split up (e.g., to add an object writer pass), this method can be used to
 /// finish up adding passes to emit the file, if necessary.
@@ -99,13 +119,12 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
                                                   MachineCodeEmitter *MCE,
                                                   CodeGenOpt::Level OptLevel) {
   if (MCE)
-    addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE);
+    addSimpleCodeEmitter(PM, OptLevel, *MCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
-
   return false; // success!
 }
 
@@ -116,12 +135,27 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
                                                   JITCodeEmitter *JCE,
                                                   CodeGenOpt::Level OptLevel) {
   if (JCE)
-    addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE);
+    addSimpleCodeEmitter(PM, OptLevel, *JCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
+  return false; // success!
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
+                                                  ObjectCodeEmitter *OCE,
+                                                  CodeGenOpt::Level OptLevel) {
+  if (OCE)
+    addSimpleCodeEmitter(PM, OptLevel, *OCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
+
+  PM.add(createGCInfoDeleter());
 
   return false; // success!
 }
@@ -140,15 +174,14 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
     return true;
 
   if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
-  addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE);
+  addCodeEmitter(PM, OptLevel, MCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
-
   return false; // success!
 }
 
@@ -166,22 +199,21 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
     return true;
 
   if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
-  addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE);
+  addCodeEmitter(PM, OptLevel, JCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
-
   return false; // success!
 }
 
 static void printAndVerify(PassManagerBase &PM,
                            bool allowDoubleDefs = false) {
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
   if (VerifyMachineCode)
     PM.add(createMachineVerifierPass(allowDoubleDefs));
@@ -203,18 +235,31 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Turn exception handling constructs into something the code generators can
   // handle.
-  if (!getTargetAsmInfo()->doesSupportExceptionHandling())
-    PM.add(createLowerInvokePass(getTargetLowering()));
-  else
+  switch (getMCAsmInfo()->getExceptionHandlingType())
+  {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
     PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createSjLjEHPass(getTargetLowering()));
+    break;
+  case ExceptionHandling::Dwarf:
+    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    break;
+  case ExceptionHandling::None:
+    PM.add(createLowerInvokePass(getTargetLowering()));
+    break;
+  }
 
   PM.add(createGCLoweringPass());
 
   // Make sure that no unreachable blocks are instruction selected.
   PM.add(createUnreachableBlockEliminationPass());
 
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
+    if (HoistConstants)
+      PM.add(createCodeGenLICMPass());
     PM.add(createCodeGenPreparePass(getTargetLowering()));
+  }
 
   PM.add(createStackProtectorPass(getTargetLowering()));
 
@@ -225,6 +270,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Standard Lower-Level Passes.
 
+  // Set up a MachineFunction for the rest of CodeGen to work on.
+  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
   // Enable FastISel with -fast, but allow that to be overridden.
   if (EnableFastISelOption == cl::BOU_TRUE ||
       (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
@@ -240,19 +288,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createMachineLICMPass());
     PM.add(createMachineSinkingPass());
-    printAndVerify(PM, /* allowDoubleDefs= */ false);
+    printAndVerify(PM, /* allowDoubleDefs= */ true);
   }
 
   // Run pre-ra passes.
   if (addPreRegAlloc(PM, OptLevel))
-    printAndVerify(PM);
+    printAndVerify(PM, /* allowDoubleDefs= */ true);
 
   // Perform register allocation.
   PM.add(createRegisterAllocator());
 
   // Perform stack slot coloring.
   if (OptLevel != CodeGenOpt::None)
-    PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive));
+    // FIXME: Re-enable coloring with register when it's capable of adding
+    // kill markers.
+    PM.add(createStackSlotColoringPass(false));
 
   printAndVerify(PM);           // Print the register-allocated code
 
@@ -267,8 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   PM.add(createPrologEpilogCodeInserter());
   printAndVerify(PM);
 
+  // Run pre-sched2 passes.
+  if (addPreSched2(PM, OptLevel))
+    printAndVerify(PM);
+
   // Second pass scheduler.
-  if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) {
+  if (OptLevel != CodeGenOpt::None) {
     PM.add(createPostRAScheduler());
     printAndVerify(PM);
   }
@@ -283,7 +337,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   printAndVerify(PM);
 
   if (PrintGCInfo)
-    PM.add(createGCInfoPrinter(*cerr));
+    PM.add(createGCInfoPrinter(errs()));
 
   return false;
 }
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 26722a3..a02a4a6 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -23,12 +23,16 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
-#include <ostream>
 using namespace llvm;
 
+// Print a LiveIndex to a raw_ostream.
+void LiveIndex::print(raw_ostream &os) const {
+  os << (index & ~PHI_BIT);
+}
+
 // An example for liveAt():
 //
 // this = [1,4), liveAt(0) will return false. The instruction defining this
@@ -36,7 +40,7 @@ using namespace llvm;
 // variable it represents. This is because slot 1 is used (def slot) and spans
 // up to slot 3 (store slot).
 //
-bool LiveInterval::liveAt(unsigned I) const {
+bool LiveInterval::liveAt(LiveIndex I) const {
   Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
 
   if (r == ranges.begin())
@@ -49,7 +53,7 @@ bool LiveInterval::liveAt(unsigned I) const {
 // liveBeforeAndAt - Check if the interval is live at the index and the index
 // just before it. If index is liveAt, check if it starts a new live range.
 // If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(unsigned I) const {
+bool LiveInterval::liveBeforeAndAt(LiveIndex I) const {
   Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
 
   if (r == ranges.begin())
@@ -127,7 +131,7 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
 
 /// overlaps - Return true if the live interval overlaps a range specified
 /// by [Start, End).
-bool LiveInterval::overlaps(unsigned Start, unsigned End) const {
+bool LiveInterval::overlaps(LiveIndex Start, LiveIndex End) const {
   assert(Start < End && "Invalid range");
   const_iterator I  = begin();
   const_iterator E  = end();
@@ -145,10 +149,10 @@ bool LiveInterval::overlaps(unsigned Start, unsigned End) const {
 /// specified by I to end at the specified endpoint.  To do this, we should
 /// merge and eliminate all ranges that this will overlap with.  The iterator is
 /// not invalidated.
-void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) {
   assert(I != ranges.end() && "Not a valid interval!");
   VNInfo *ValNo = I->valno;
-  unsigned OldEnd = I->end;
+  LiveIndex OldEnd = I->end;
 
   // Search for the first interval that we can't merge with.
   Ranges::iterator MergeTo = next(I);
@@ -163,7 +167,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
   ranges.erase(next(I), MergeTo);
 
   // Update kill info.
-  removeKills(ValNo, OldEnd, I->end-1);
+  ValNo->removeKills(OldEnd, I->end.prevSlot_());
 
   // If the newly formed range now touches the range after it and if they have
   // the same value number, merge the two ranges into one range.
@@ -179,7 +183,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
 /// specified by I to start at the specified endpoint.  To do this, we should
 /// merge and eliminate all ranges that this will overlap with.
 LiveInterval::Ranges::iterator
-LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStart) {
   assert(I != ranges.end() && "Not a valid interval!");
   VNInfo *ValNo = I->valno;
 
@@ -212,7 +216,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
 
 LiveInterval::iterator
 LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
-  unsigned Start = LR.start, End = LR.end;
+  LiveIndex Start = LR.start, End = LR.end;
   iterator it = std::upper_bound(From, ranges.end(), Start);
 
   // If the inserted interval starts in the middle or right at the end of
@@ -246,7 +250,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
           extendIntervalEndTo(it, End);
         else if (End < it->end)
           // Overlapping intervals, there might have been a kill here.
-          removeKill(it->valno, End);
+          it->valno->removeKill(End);
         return it;
       }
     } else {
@@ -262,33 +266,32 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
   return ranges.insert(it, LR);
 }
 
-/// isInOneLiveRange - Return true if the range specified is entirely in the
+/// isInOneLiveRange - Return true if the range specified is entirely in 
 /// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) {
+bool LiveInterval::isInOneLiveRange(LiveIndex Start, LiveIndex End) {
   Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
   if (I == ranges.begin())
     return false;
   --I;
-  return I->contains(Start) && I->contains(End-1);
+  return I->containsRange(Start, End);
 }
 
 
 /// removeRange - Remove the specified range from this interval.  Note that
 /// the range must be in a single LiveRange in its entirety.
-void LiveInterval::removeRange(unsigned Start, unsigned End,
+void LiveInterval::removeRange(LiveIndex Start, LiveIndex End,
                                bool RemoveDeadValNo) {
   // Find the LiveRange containing this span.
   Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
   assert(I != ranges.begin() && "Range is not in interval!");
   --I;
-  assert(I->contains(Start) && I->contains(End-1) &&
-         "Range is not entirely in interval!");
+  assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
 
   // If the span we are removing is at the start of the LiveRange, adjust it.
   VNInfo *ValNo = I->valno;
   if (I->start == Start) {
     if (I->end == End) {
-      removeKills(I->valno, Start, End);
+      ValNo->removeKills(Start, End);
       if (RemoveDeadValNo) {
         // Check if val# is dead.
         bool isDead = true;
@@ -322,13 +325,13 @@ void LiveInterval::removeRange(unsigned Start, unsigned End,
   // Otherwise if the span we are removing is at the end of the LiveRange,
   // adjust the other way.
   if (I->end == End) {
-    removeKills(ValNo, Start, End);
+    ValNo->removeKills(Start, End);
     I->end = Start;
     return;
   }
 
   // Otherwise, we are splitting the LiveRange into two pieces.
-  unsigned OldEnd = I->end;
+  LiveIndex OldEnd = I->end;
   I->end = Start;   // Trim the old interval.
 
   // Insert the new one.
@@ -362,11 +365,12 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
  
 /// scaleNumbering - Renumber VNI and ranges to provide gaps for new
 /// instructions.                                                   
+
 void LiveInterval::scaleNumbering(unsigned factor) {
   // Scale ranges.                                                            
   for (iterator RI = begin(), RE = end(); RI != RE; ++RI) {
-    RI->start = InstrSlots::scale(RI->start, factor);
-    RI->end = InstrSlots::scale(RI->end, factor);
+    RI->start = RI->start.scale(factor);
+    RI->end = RI->end.scale(factor);
   }
 
   // Scale VNI info.                                                          
@@ -374,19 +378,20 @@ void LiveInterval::scaleNumbering(unsigned factor) {
     VNInfo *vni = *VNI;
 
     if (vni->isDefAccurate())
-      vni->def = InstrSlots::scale(vni->def, factor);
+      vni->def = vni->def.scale(factor);
 
     for (unsigned i = 0; i < vni->kills.size(); ++i) {
-      if (vni->kills[i] != 0)
-        vni->kills[i] = InstrSlots::scale(vni->kills[i], factor);
+      if (!vni->kills[i].isPHIIndex())
+        vni->kills[i] = vni->kills[i].scale(factor);
     }
   }
 }
 
+
 /// getLiveRangeContaining - Return the live range that contains the
 /// specified index, or null if there is none.
 LiveInterval::const_iterator 
-LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
+LiveInterval::FindLiveRangeContaining(LiveIndex Idx) const {
   const_iterator It = std::upper_bound(begin(), end(), Idx);
   if (It != ranges.begin()) {
     --It;
@@ -398,7 +403,7 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
 }
 
 LiveInterval::iterator 
-LiveInterval::FindLiveRangeContaining(unsigned Idx) {
+LiveInterval::FindLiveRangeContaining(LiveIndex Idx) {
   iterator It = std::upper_bound(begin(), end(), Idx);
   if (It != begin()) {
     --It;
@@ -409,17 +414,27 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) {
   return end();
 }
 
-/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index
-/// (register interval) or defined by the specified register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
-  VNInfo *VNI = NULL;
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// index (register interval).
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(LiveIndex Idx) const {
   for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
-       i != e; ++i)
-    if ((*i)->def == DefIdxOrReg) {
-      VNI = *i;
-      break;
-    }
-  return VNI;
+       i != e; ++i) {
+    if ((*i)->def == Idx)
+      return *i;
+  }
+
+  return 0;
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// register (stack inteval).
+VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
+  for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+       i != e; ++i) {
+    if ((*i)->getReg() == reg)
+      return *i;
+  }
+  return 0;
 }
 
 /// join - Join two live intervals (this, and other) together.  This applies
@@ -502,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
     InsertPos = addRangeFrom(*I, InsertPos);
   }
 
-  weight += Other.weight;
+  ComputeJoinedWeight(Other);
 
   // Update regalloc hint if currently there isn't one.
   if (TargetRegisterInfo::isVirtualRegister(reg) &&
@@ -546,7 +561,7 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
   for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
     if (I->valno != RHSValNo)
       continue;
-    unsigned Start = I->start, End = I->end;
+    LiveIndex Start = I->start, End = I->end;
     IP = std::upper_bound(IP, end(), Start);
     // If the start of this range overlaps with an existing liverange, trim it.
     if (IP != begin() && IP[-1].end > Start) {
@@ -622,20 +637,21 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
     else if (UnusedValNo)
       ClobberValNo = UnusedValNo;
     else {
-      UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
+      UnusedValNo = ClobberValNo =
+        getNextValue(LiveIndex(), 0, false, VNInfoAllocator);
       ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
     }
 
     bool Done = false;
-    unsigned Start = I->start, End = I->end;
+    LiveIndex Start = I->start, End = I->end;
     // If a clobber range starts before an existing range and ends after
     // it, the clobber range will need to be split into multiple ranges.
     // Loop until the entire clobber range is handled.
     while (!Done) {
       Done = true;
       IP = std::upper_bound(IP, end(), Start);
-      unsigned SubRangeStart = Start;
-      unsigned SubRangeEnd = End;
+      LiveIndex SubRangeStart = Start;
+      LiveIndex SubRangeEnd = End;
 
       // If the start of this range overlaps with an existing liverange, trim it.
       if (IP != begin() && IP[-1].end > SubRangeStart) {
@@ -671,11 +687,13 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
   }
 }
 
-void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
+void LiveInterval::MergeInClobberRange(LiveIndex Start,
+                                       LiveIndex End,
                                        BumpPtrAllocator &VNInfoAllocator) {
   // Find a value # to use for the clobber ranges.  If there is already a value#
   // for unknown values, use it.
-  VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
+  VNInfo *ClobberValNo =
+    getNextValue(LiveIndex(), 0, false, VNInfoAllocator);
   
   iterator IP = begin();
   IP = std::upper_bound(IP, end(), Start);
@@ -711,7 +729,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
 
   // Make sure V2 is smaller than V1.
   if (V1->id < V2->id) {
-    copyValNumInfo(V1, V2);
+    V1->copyFrom(*V2);
     std::swap(V1, V2);
   }
 
@@ -788,20 +806,42 @@ void LiveInterval::Copy(const LiveInterval &RHS,
 unsigned LiveInterval::getSize() const {
   unsigned Sum = 0;
   for (const_iterator I = begin(), E = end(); I != E; ++I)
-    Sum += I->end - I->start;
+    Sum += I->start.distance(I->end);
   return Sum;
 }
 
-std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) {
+/// ComputeJoinedWeight - Set the weight of a live interval Joined
+/// after Other has been merged into it.
+void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
+  // If either of these intervals was spilled, the weight is the
+  // weight of the non-spilled interval.  This can only happen with
+  // iterative coalescers.
+
+  if (Other.weight != HUGE_VALF) {
+    weight += Other.weight;
+  }
+  else if (weight == HUGE_VALF &&
+      !TargetRegisterInfo::isPhysicalRegister(reg)) {
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining to spilled interval");
+    weight = Other.weight;
+  }
+  else {
+    // Otherwise the weight stays the same
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining from spilled interval");
+  }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
   return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
 }
 
 void LiveRange::dump() const {
-  cerr << *this << "\n";
+  errs() << *this << "\n";
 }
 
-void LiveInterval::print(std::ostream &OS,
-                         const TargetRegisterInfo *TRI) const {
+void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
   if (isStackSlot())
     OS << "SS#" << getStackSlotIndex();
   else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
@@ -841,6 +881,8 @@ void LiveInterval::print(std::ostream &OS,
           OS << "-(";
           for (unsigned j = 0; j != ee; ++j) {
             OS << vni->kills[j];
+            if (vni->kills[j].isPHIIndex())
+              OS << "*";
             if (j != ee-1)
               OS << " ";
           }
@@ -857,10 +899,10 @@ void LiveInterval::print(std::ostream &OS,
 }
 
 void LiveInterval::dump() const {
-  cerr << *this << "\n";
+  errs() << *this << "\n";
 }
 
 
-void LiveRange::print(std::ostream &os) const {
+void LiveRange::print(raw_ostream &os) const {
   os << *this;
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 52a30bc..93d3d4c 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
@@ -34,6 +35,8 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -47,24 +50,24 @@ using namespace llvm;
 static cl::opt<bool> DisableReMat("disable-rematerialization", 
                                   cl::init(false), cl::Hidden);
 
-static cl::opt<bool> SplitAtBB("split-intervals-at-bb", 
-                               cl::init(true), cl::Hidden);
-static cl::opt<int> SplitLimit("split-limit",
-                               cl::init(-1), cl::Hidden);
-
-static cl::opt<bool> EnableAggressiveRemat("aggressive-remat", cl::Hidden);
-
 static cl::opt<bool> EnableFastSpilling("fast-spill",
                                         cl::init(false), cl::Hidden);
 
-STATISTIC(numIntervals, "Number of original intervals");
-STATISTIC(numFolds    , "Number of loads/stores folded into instructions");
-STATISTIC(numSplits   , "Number of intervals split");
+static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false));
+
+static cl::opt<int> CoalescingLimit("early-coalescing-limit",
+                                    cl::init(-1), cl::Hidden);
+
+STATISTIC(numIntervals , "Number of original intervals");
+STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits    , "Number of intervals split");
+STATISTIC(numCoalescing, "Number of early coalescing performed");
 
 char LiveIntervals::ID = 0;
 static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
 
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
   AU.addPreserved<LiveVariables>();
@@ -92,15 +95,32 @@ void LiveIntervals::releaseMemory() {
   mi2iMap_.clear();
   i2miMap_.clear();
   r2iMap_.clear();
+  terminatorGaps.clear();
+  phiJoinCopies.clear();
+
   // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
   VNInfoAllocator.Reset();
-  while (!ClonedMIs.empty()) {
-    MachineInstr *MI = ClonedMIs.back();
-    ClonedMIs.pop_back();
+  while (!CloneMIs.empty()) {
+    MachineInstr *MI = CloneMIs.back();
+    CloneMIs.pop_back();
     mf_->DeleteMachineInstr(MI);
   }
 }
 
+static bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
+                                   unsigned OpIdx, const TargetInstrInfo *tii_){
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      Reg == SrcReg)
+    return true;
+
+  if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+    return true;
+  if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+    return true;
+  return false;
+}
+
 /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
 /// there is one implicit_def for each use. Add isUndef marker to
 /// implicit_def defs and their uses.
@@ -119,16 +139,33 @@ void LiveIntervals::processImplicitDefs() {
       ++I;
       if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
         unsigned Reg = MI->getOperand(0).getReg();
-        MI->getOperand(0).setIsUndef();
         ImpDefRegs.insert(Reg);
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+            ImpDefRegs.insert(*SS);
+        }
         ImpDefMIs.push_back(MI);
         continue;
       }
 
+      if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+        MachineOperand &MO = MI->getOperand(2);
+        if (ImpDefRegs.count(MO.getReg())) {
+          // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
+          // This is an identity copy, eliminate it now.
+          if (MO.isKill()) {
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+            vi.removeKill(MI);
+          }
+          MI->eraseFromParent();
+          continue;
+        }
+      }
+
       bool ChangedToImpDef = false;
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg() || !MO.isUse())
+        if (!MO.isReg() || !MO.isUse() || MO.isUndef())
           continue;
         unsigned Reg = MO.getReg();
         if (!Reg)
@@ -136,22 +173,30 @@ void LiveIntervals::processImplicitDefs() {
         if (!ImpDefRegs.count(Reg))
           continue;
         // Use is a copy, just turn it into an implicit_def.
-        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-        if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-            Reg == SrcReg) {
+        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
           bool isKill = MO.isKill();
           MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
           for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
             MI->RemoveOperand(j);
-          if (isKill)
+          if (isKill) {
             ImpDefRegs.erase(Reg);
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            vi.removeKill(MI);
+          }
           ChangedToImpDef = true;
           break;
         }
 
         MO.setIsUndef();
-        if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+          // Make sure other uses of 
+          for (unsigned j = i+1; j != e; ++j) {
+            MachineOperand &MOJ = MI->getOperand(j);
+            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+              MOJ.setIsUndef();
+          }
           ImpDefRegs.erase(Reg);
+        }
       }
 
       if (ChangedToImpDef) {
@@ -171,11 +216,13 @@ void LiveIntervals::processImplicitDefs() {
     for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
       MachineInstr *MI = ImpDefMIs[i];
       unsigned Reg = MI->getOperand(0).getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(Reg))
-        // Physical registers are not liveout (yet).
-        continue;
-      if (!ImpDefRegs.count(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+          !ImpDefRegs.count(Reg)) {
+        // Delete all "local" implicit_def's. That include those which define
+        // physical registers since they cannot be liveout.
+        MI->eraseFromParent();
         continue;
+      }
 
       // If there are multiple defs of the same register and at least one
       // is not an implicit_def, do not insert implicit_def's before the
@@ -191,6 +238,10 @@ void LiveIntervals::processImplicitDefs() {
       if (Skip)
         continue;
 
+      // The only implicit_def which we want to keep are those that are live
+      // out of its block.
+      MI->eraseFromParent();
+
       for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
              UE = mri_->use_end(); UI != UE; ) {
         MachineOperand &RMO = UI.getOperand();
@@ -199,12 +250,19 @@ void LiveIntervals::processImplicitDefs() {
         MachineBasicBlock *RMBB = RMI->getParent();
         if (RMBB == MBB)
           continue;
+
+        // Turn a copy use into an implicit_def.
+        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+        if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+            Reg == SrcReg) {
+          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            RMI->RemoveOperand(j);
+          continue;
+        }
+
         const TargetRegisterClass* RC = mri_->getRegClass(Reg);
         unsigned NewVReg = mri_->createVirtualRegister(RC);
-        MachineInstrBuilder MIB =
-          BuildMI(*RMBB, RMI, RMI->getDebugLoc(),
-                  tii_->get(TargetInstrInfo::IMPLICIT_DEF), NewVReg);
-        (*MIB).getOperand(0).setIsUndef();
         RMO.setReg(NewVReg);
         RMO.setIsUndef();
         RMO.setIsKill();
@@ -215,6 +273,7 @@ void LiveIntervals::processImplicitDefs() {
   }
 }
 
+
 void LiveIntervals::computeNumbering() {
   Index2MiMap OldI2MI = i2miMap_;
   std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap;
@@ -223,44 +282,79 @@ void LiveIntervals::computeNumbering() {
   MBB2IdxMap.clear();
   mi2iMap_.clear();
   i2miMap_.clear();
+  terminatorGaps.clear();
+  phiJoinCopies.clear();
   
   FunctionSize = 0;
   
   // Number MachineInstrs and MachineBasicBlocks.
   // Initialize MBB indexes to a sentinal.
-  MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U));
+  MBB2IdxMap.resize(mf_->getNumBlockIDs(),
+                    std::make_pair(LiveIndex(),LiveIndex()));
   
-  unsigned MIIndex = 0;
+  LiveIndex MIIndex;
   for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
        MBB != E; ++MBB) {
-    unsigned StartIdx = MIIndex;
+    LiveIndex StartIdx = MIIndex;
 
     // Insert an empty slot at the beginning of each block.
-    MIIndex += InstrSlots::NUM;
+    MIIndex = getNextIndex(MIIndex);
     i2miMap_.push_back(0);
 
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
          I != E; ++I) {
+      
+      if (I == MBB->getFirstTerminator()) {
+        // Leave a gap for before terminators, this is where we will point
+        // PHI kills.
+        LiveIndex tGap(true, MIIndex);
+        bool inserted =
+          terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second;
+        assert(inserted && 
+               "Multiple 'first' terminators encountered during numbering.");
+        inserted = inserted; // Avoid compiler warning if assertions turned off.
+        i2miMap_.push_back(0);
+
+        MIIndex = getNextIndex(MIIndex);
+      }
+
       bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
       assert(inserted && "multiple MachineInstr -> index mappings");
       inserted = true;
       i2miMap_.push_back(I);
-      MIIndex += InstrSlots::NUM;
+      MIIndex = getNextIndex(MIIndex);
       FunctionSize++;
       
       // Insert max(1, numdefs) empty slots after every instruction.
       unsigned Slots = I->getDesc().getNumDefs();
       if (Slots == 0)
         Slots = 1;
-      MIIndex += InstrSlots::NUM * Slots;
-      while (Slots--)
+      while (Slots--) {
+        MIIndex = getNextIndex(MIIndex);
         i2miMap_.push_back(0);
+      }
+
+    }
+  
+    if (MBB->getFirstTerminator() == MBB->end()) {
+      // Leave a gap for before terminators, this is where we will point
+      // PHI kills.
+      LiveIndex tGap(true, MIIndex);
+      bool inserted =
+        terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second;
+      assert(inserted && 
+             "Multiple 'first' terminators encountered during numbering.");
+      inserted = inserted; // Avoid compiler warning if assertions turned off.
+      i2miMap_.push_back(0);
+ 
+      MIIndex = getNextIndex(MIIndex);
     }
     
     // Set the MBB2IdxMap entry for this MBB.
-    MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1);
+    MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, getPrevSlot(MIIndex));
     Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB));
   }
+
   std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
   
   if (!OldI2MI.empty())
@@ -272,9 +366,9 @@ void LiveIntervals::computeNumbering() {
         // number, or our best guess at what it _should_ correspond to if the
         // original instruction has been erased.  This is either the following
         // instruction or its predecessor.
-        unsigned index = LI->start / InstrSlots::NUM;
-        unsigned offset = LI->start % InstrSlots::NUM;
-        if (offset == InstrSlots::LOAD) {
+        unsigned index = LI->start.getVecIndex();
+        LiveIndex::Slot offset = LI->start.getSlot();
+        if (LI->start.isLoad()) {
           std::vector<IdxMBBPair>::const_iterator I =
                   std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start);
           // Take the pair containing the index
@@ -283,29 +377,34 @@ void LiveIntervals::computeNumbering() {
           
           LI->start = getMBBStartIdx(J->second);
         } else {
-          LI->start = mi2iMap_[OldI2MI[index]] + offset;
+          LI->start = LiveIndex(
+            LiveIndex(mi2iMap_[OldI2MI[index]]), 
+                              (LiveIndex::Slot)offset);
         }
         
         // Remap the ending index in the same way that we remapped the start,
         // except for the final step where we always map to the immediately
         // following instruction.
-        index = (LI->end - 1) / InstrSlots::NUM;
-        offset  = LI->end % InstrSlots::NUM;
-        if (offset == InstrSlots::LOAD) {
+        index = (getPrevSlot(LI->end)).getVecIndex();
+        offset  = LI->end.getSlot();
+        if (LI->end.isLoad()) {
           // VReg dies at end of block.
           std::vector<IdxMBBPair>::const_iterator I =
                   std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end);
           --I;
           
-          LI->end = getMBBEndIdx(I->second) + 1;
+          LI->end = getNextSlot(getMBBEndIdx(I->second));
         } else {
           unsigned idx = index;
           while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
           
           if (index != OldI2MI.size())
-            LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0);
+            LI->end =
+              LiveIndex(mi2iMap_[OldI2MI[index]],
+                (idx == index ? offset : LiveIndex::LOAD));
           else
-            LI->end = InstrSlots::NUM * i2miMap_.size();
+            LI->end =
+              LiveIndex(LiveIndex::NUM * i2miMap_.size());
         }
       }
       
@@ -317,9 +416,9 @@ void LiveIntervals::computeNumbering() {
         // start indices above. VN's with special sentinel defs
         // don't need to be remapped.
         if (vni->isDefAccurate() && !vni->isUnused()) {
-          unsigned index = vni->def / InstrSlots::NUM;
-          unsigned offset = vni->def % InstrSlots::NUM;
-          if (offset == InstrSlots::LOAD) {
+          unsigned index = vni->def.getVecIndex();
+          LiveIndex::Slot offset = vni->def.getSlot();
+          if (vni->def.isLoad()) {
             std::vector<IdxMBBPair>::const_iterator I =
                   std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def);
             // Take the pair containing the index
@@ -328,25 +427,36 @@ void LiveIntervals::computeNumbering() {
           
             vni->def = getMBBStartIdx(J->second);
           } else {
-            vni->def = mi2iMap_[OldI2MI[index]] + offset;
+            vni->def = LiveIndex(mi2iMap_[OldI2MI[index]], offset);
           }
         }
         
         // Remap the VNInfo kill indices, which works the same as
         // the end indices above.
         for (size_t i = 0; i < vni->kills.size(); ++i) {
-          // PHI kills don't need to be remapped.
-          if (!vni->kills[i]) continue;
-          
-          unsigned index = (vni->kills[i]-1) / InstrSlots::NUM;
-          unsigned offset = vni->kills[i] % InstrSlots::NUM;
-          if (offset == InstrSlots::LOAD) {
-            std::vector<IdxMBBPair>::const_iterator I =
+          unsigned index = getPrevSlot(vni->kills[i]).getVecIndex();
+          LiveIndex::Slot offset = vni->kills[i].getSlot();
+
+          if (vni->kills[i].isLoad()) {
+            assert("Value killed at a load slot.");
+            /*std::vector<IdxMBBPair>::const_iterator I =
              std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
             --I;
 
-            vni->kills[i] = getMBBEndIdx(I->second);
+            vni->kills[i] = getMBBEndIdx(I->second);*/
           } else {
+            if (vni->kills[i].isPHIIndex()) {
+              std::vector<IdxMBBPair>::const_iterator I =
+                std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
+              --I;
+              vni->kills[i] = terminatorGaps[I->second];  
+            } else {
+              assert(OldI2MI[index] != 0 &&
+                     "Kill refers to instruction not present in index maps.");
+              vni->kills[i] = LiveIndex(mi2iMap_[OldI2MI[index]], offset);
+            }
+           
+            /*
             unsigned idx = index;
             while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
             
@@ -355,6 +465,7 @@ void LiveIntervals::computeNumbering() {
                               (idx == index ? offset : 0);
             else
               vni->kills[i] = InstrSlots::NUM * i2miMap_.size();
+            */
           }
         }
       }
@@ -372,13 +483,20 @@ void LiveIntervals::scaleNumbering(int factor) {
   Idx2MBBMap.clear();
   for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end();
        MBB != MBBE; ++MBB) {
-    std::pair<unsigned, unsigned> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
-    mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor);
-    mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor);
+    std::pair<LiveIndex, LiveIndex> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
+    mbbIndices.first = mbbIndices.first.scale(factor);
+    mbbIndices.second = mbbIndices.second.scale(factor);
     Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); 
   }
   std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
 
+  // Scale terminator gaps.
+  for (DenseMap<MachineBasicBlock*, LiveIndex>::iterator
+       TGI = terminatorGaps.begin(), TGE = terminatorGaps.end();
+       TGI != TGE; ++TGI) {
+    terminatorGaps[TGI->first] = TGI->second.scale(factor);
+  }
+
   // Scale the intervals.
   for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
     LI->second->scaleNumbering(factor);
@@ -386,19 +504,20 @@ void LiveIntervals::scaleNumbering(int factor) {
 
   // Scale MachineInstrs.
   Mi2IndexMap oldmi2iMap = mi2iMap_;
-  unsigned highestSlot = 0;
+  LiveIndex highestSlot;
   for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end();
        MI != ME; ++MI) {
-    unsigned newSlot = InstrSlots::scale(MI->second, factor);
+    LiveIndex newSlot = MI->second.scale(factor);
     mi2iMap_[MI->first] = newSlot;
     highestSlot = std::max(highestSlot, newSlot); 
   }
 
+  unsigned highestVIndex = highestSlot.getVecIndex();
   i2miMap_.clear();
-  i2miMap_.resize(highestSlot + 1);
+  i2miMap_.resize(highestVIndex + 1);
   for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end();
        MI != ME; ++MI) {
-    i2miMap_[MI->second] = MI->first;
+    i2miMap_[MI->second.getVecIndex()] = const_cast<MachineInstr *>(MI->first);
   }
 
 }
@@ -419,6 +538,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   processImplicitDefs();
   computeNumbering();
   computeIntervals();
+  performEarlyCoalescing();
 
   numIntervals += getNumIntervals();
 
@@ -427,36 +547,45 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
 }
 
 /// print - Implement the dump method.
-void LiveIntervals::print(std::ostream &O, const Module* ) const {
-  O << "********** INTERVALS **********\n";
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+  OS << "********** INTERVALS **********\n";
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    I->second->print(O, tri_);
-    O << "\n";
+    I->second->print(OS, tri_);
+    OS << "\n";
   }
 
-  O << "********** MACHINEINSTRS **********\n";
+  printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+  OS << "********** MACHINEINSTRS **********\n";
+
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
-    O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+    OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
-      O << getInstructionIndex(mii) << '\t' << *mii;
+      OS << getInstructionIndex(mii) << '\t' << *mii;
     }
   }
 }
 
+void LiveIntervals::dumpInstrs() const {
+  printInstrs(errs());
+}
+
 /// conflictsWithPhysRegDef - Returns true if the specified register
 /// is defined during the duration of the specified interval.
 bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
                                             VirtRegMap &vrm, unsigned reg) {
   for (LiveInterval::Ranges::const_iterator
          I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (unsigned index = getBaseIndex(I->start),
-           end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
-         index += InstrSlots::NUM) {
+    for (LiveIndex index = getBaseIndex(I->start),
+           end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end;
+         index = getNextIndex(index)) {
       // skip deleted instructions
       while (index != end && !getInstructionFromIndex(index))
-        index += InstrSlots::NUM;
+        index = getNextIndex(index);
       if (index == end) break;
 
       MachineInstr *MI = getInstructionFromIndex(index);
@@ -492,16 +621,16 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
   for (LiveInterval::Ranges::const_iterator
          I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (unsigned index = getBaseIndex(I->start),
-           end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
-         index += InstrSlots::NUM) {
+    for (LiveIndex index = getBaseIndex(I->start),
+           end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end;
+         index = getNextIndex(index)) {
       // Skip deleted instructions.
       MachineInstr *MI = 0;
       while (index != end) {
         MI = getInstructionFromIndex(index);
         if (MI)
           break;
-        index += InstrSlots::NUM;
+        index = getNextIndex(index);
       }
       if (index == end) break;
 
@@ -525,35 +654,36 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
   return false;
 }
 
-
-void LiveIntervals::printRegName(unsigned reg) const {
+#ifndef NDEBUG
+static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    cerr << tri_->getName(reg);
+    errs() << tri_->getName(reg);
   else
-    cerr << "%reg" << reg;
+    errs() << "%reg" << reg;
 }
+#endif
 
 void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              MachineBasicBlock::iterator mi,
-                                             unsigned MIIdx, MachineOperand& MO,
+                                             LiveIndex MIIdx,
+                                             MachineOperand& MO,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
-  DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
-  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
-
-  if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
-    DOUT << "is a implicit_def\n";
-    return;
-  }
+  DEBUG({
+      errs() << "\t\tregister: ";
+      printRegName(interval.reg, tri_);
+    });
 
   // Virtual registers may be defined multiple times (due to phi
   // elimination and 2-addr elimination).  Much of what we do only has to be
   // done once for the vreg.  We use an empty interval to detect the first
   // time we see a vreg.
+  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
   if (interval.empty()) {
     // Get the Idx of the defining instructions.
-    unsigned defIndex = getDefIndex(MIIdx);
-    // Earlyclobbers move back one.
+    LiveIndex defIndex = getDefIndex(MIIdx);
+    // Earlyclobbers move back one, so that they overlap the live range
+    // of inputs.
     if (MO.isEarlyClobber())
       defIndex = getUseIndex(MIIdx);
     VNInfo *ValNo;
@@ -575,11 +705,16 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // will be a single kill, in MBB, which comes after the definition.
     if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
       // FIXME: what about dead vars?
-      unsigned killIdx;
+      LiveIndex killIdx;
       if (vi.Kills[0] != mi)
-        killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+        killIdx = getNextSlot(getUseIndex(getInstructionIndex(vi.Kills[0])));
+      else if (MO.isEarlyClobber())
+        // Earlyclobbers that die in this instruction move up one extra, to
+        // compensate for having the starting point moved back one.  This
+        // gets them to overlap the live range of other outputs.
+        killIdx = getNextSlot(getNextSlot(defIndex));
       else
-        killIdx = defIndex+1;
+        killIdx = getNextSlot(defIndex);
 
       // If the kill happens after the definition, we have an intra-block
       // live range.
@@ -588,8 +723,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                "Shouldn't be alive across any blocks!");
         LiveRange LR(defIndex, killIdx, ValNo);
         interval.addRange(LR);
-        DOUT << " +" << LR << "\n";
-        interval.addKill(ValNo, killIdx);
+        DEBUG(errs() << " +" << LR << "\n");
+        ValNo->addKill(killIdx);
         return;
       }
     }
@@ -598,8 +733,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // of the defining block, potentially live across some blocks, then is
     // live into some number of blocks, but gets killed.  Start by adding a
     // range that goes from this definition to the end of the defining block.
-    LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo);
-    DOUT << " +" << NewLR;
+    LiveRange NewLR(defIndex, getNextSlot(getMBBEndIdx(mbb)), ValNo);
+    DEBUG(errs() << " +" << NewLR);
     interval.addRange(NewLR);
 
     // Iterate over all of the blocks that the variable is completely
@@ -608,22 +743,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), 
              E = vi.AliveBlocks.end(); I != E; ++I) {
       LiveRange LR(getMBBStartIdx(*I),
-                   getMBBEndIdx(*I)+1,  // MBB ends at -1.
+                   getNextSlot(getMBBEndIdx(*I)),  // MBB ends at -1.
                    ValNo);
       interval.addRange(LR);
-      DOUT << " +" << LR;
+      DEBUG(errs() << " +" << LR);
     }
 
     // Finally, this virtual register is live from the start of any killing
     // block to the 'use' slot of the killing instruction.
     for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
       MachineInstr *Kill = vi.Kills[i];
-      unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1;
-      LiveRange LR(getMBBStartIdx(Kill->getParent()),
-                   killIdx, ValNo);
+      LiveIndex killIdx =
+        getNextSlot(getUseIndex(getInstructionIndex(Kill)));
+      LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
       interval.addRange(LR);
-      interval.addKill(ValNo, killIdx);
-      DOUT << " +" << LR;
+      ValNo->addKill(killIdx);
+      DEBUG(errs() << " +" << LR);
     }
 
   } else {
@@ -638,12 +773,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // need to take the LiveRegion that defines this register and split it
       // into two values.
       assert(interval.containsOneValue());
-      unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
-      unsigned RedefIndex = getDefIndex(MIIdx);
+      LiveIndex DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
+      LiveIndex RedefIndex = getDefIndex(MIIdx);
       if (MO.isEarlyClobber())
         RedefIndex = getUseIndex(MIIdx);
 
-      const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1);
+      const LiveRange *OldLR =
+        interval.getLiveRangeContaining(getPrevSlot(RedefIndex));
       VNInfo *OldValNo = OldLR->valno;
 
       // Delete the initial value, which should be short and continuous,
@@ -656,68 +792,85 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
 
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
-      VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+      VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
                                             false, // update at *
                                             VNInfoAllocator);
       ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
 
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
-      OldValNo->copy = 0;
+      OldValNo->setCopy(0);
       if (MO.isEarlyClobber())
         OldValNo->setHasRedefByEC(true);
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
-      DOUT << " replace range with " << LR;
+      DEBUG(errs() << " replace range with " << LR);
       interval.addRange(LR);
-      interval.addKill(ValNo, RedefIndex);
+      ValNo->addKill(RedefIndex);
 
       // If this redefinition is dead, we need to add a dummy unit live
       // range covering the def slot.
       if (MO.isDead())
-        interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo));
-
-      DOUT << " RESULT: ";
-      interval.print(DOUT, tri_);
-
+        interval.addRange(
+          LiveRange(RedefIndex, MO.isEarlyClobber() ?
+                                getNextSlot(getNextSlot(RedefIndex)) :
+                                getNextSlot(RedefIndex), OldValNo));
+
+      DEBUG({
+          errs() << " RESULT: ";
+          interval.print(errs(), tri_);
+        });
     } else {
       // Otherwise, this must be because of phi elimination.  If this is the
       // first redefinition of the vreg that we have seen, go back and change
       // the live range in the PHI block to be a different value number.
       if (interval.containsOneValue()) {
-        assert(vi.Kills.size() == 1 &&
-               "PHI elimination vreg should have one kill, the PHI itself!");
-
         // Remove the old range that we now know has an incorrect number.
         VNInfo *VNI = interval.getValNumInfo(0);
         MachineInstr *Killer = vi.Kills[0];
-        unsigned Start = getMBBStartIdx(Killer->getParent());
-        unsigned End = getUseIndex(getInstructionIndex(Killer))+1;
-        DOUT << " Removing [" << Start << "," << End << "] from: ";
-        interval.print(DOUT, tri_); DOUT << "\n";
-        interval.removeRange(Start, End);
+        phiJoinCopies.push_back(Killer);
+        LiveIndex Start = getMBBStartIdx(Killer->getParent());
+        LiveIndex End =
+          getNextSlot(getUseIndex(getInstructionIndex(Killer)));
+        DEBUG({
+            errs() << " Removing [" << Start << "," << End << "] from: ";
+            interval.print(errs(), tri_);
+            errs() << "\n";
+          });
+        interval.removeRange(Start, End);        
+        assert(interval.ranges.size() == 1 &&
+               "Newly discovered PHI interval has >1 ranges.");
+        MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex());
+        VNI->addKill(terminatorGaps[killMBB]);
         VNI->setHasPHIKill(true);
-        DOUT << " RESULT: "; interval.print(DOUT, tri_);
+        DEBUG({
+            errs() << " RESULT: ";
+            interval.print(errs(), tri_);
+          });
 
         // Replace the interval with one of a NEW value number.  Note that this
         // value number isn't actually defined by an instruction, weird huh? :)
         LiveRange LR(Start, End,
-          interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator));
+          interval.getNextValue(LiveIndex(mbb->getNumber()),
+                                0, false, VNInfoAllocator));
         LR.valno->setIsPHIDef(true);
-        DOUT << " replace range with " << LR;
+        DEBUG(errs() << " replace range with " << LR);
         interval.addRange(LR);
-        interval.addKill(LR.valno, End);
-        DOUT << " RESULT: "; interval.print(DOUT, tri_);
+        LR.valno->addKill(End);
+        DEBUG({
+            errs() << " RESULT: ";
+            interval.print(errs(), tri_);
+          });
       }
 
       // In the case of PHI elimination, each variable definition is only
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
-      unsigned defIndex = getDefIndex(MIIdx);
+      LiveIndex defIndex = getDefIndex(MIIdx);
       if (MO.isEarlyClobber())
         defIndex = getUseIndex(MIIdx);
-      
+
       VNInfo *ValNo;
       MachineInstr *CopyMI = NULL;
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
@@ -728,55 +881,63 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         CopyMI = mi;
       ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
       
-      unsigned killIndex = getMBBEndIdx(mbb) + 1;
+      LiveIndex killIndex = getNextSlot(getMBBEndIdx(mbb));
       LiveRange LR(defIndex, killIndex, ValNo);
       interval.addRange(LR);
-      interval.addKill(ValNo, killIndex);
+      ValNo->addKill(terminatorGaps[mbb]);
       ValNo->setHasPHIKill(true);
-      DOUT << " +" << LR;
+      DEBUG(errs() << " +" << LR);
     }
   }
 
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 }
 
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineBasicBlock::iterator mi,
-                                              unsigned MIIdx,
+                                              LiveIndex MIIdx,
                                               MachineOperand& MO,
                                               LiveInterval &interval,
                                               MachineInstr *CopyMI) {
   // A physical register cannot be live across basic block, so its
   // lifetime must end somewhere in its defining basic block.
-  DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+  DEBUG({
+      errs() << "\t\tregister: ";
+      printRegName(interval.reg, tri_);
+    });
 
-  unsigned baseIndex = MIIdx;
-  unsigned start = getDefIndex(baseIndex);
+  LiveIndex baseIndex = MIIdx;
+  LiveIndex start = getDefIndex(baseIndex);
   // Earlyclobbers move back one.
   if (MO.isEarlyClobber())
     start = getUseIndex(MIIdx);
-  unsigned end = start;
+  LiveIndex end = start;
 
   // If it is not used after definition, it is considered dead at
   // the instruction defining it. Hence its interval is:
   // [defSlot(def), defSlot(def)+1)
+  // For earlyclobbers, the defSlot was pushed back one; the extra
+  // advance below compensates.
   if (MO.isDead()) {
-    DOUT << " dead";
-    end = start + 1;
+    DEBUG(errs() << " dead");
+    if (MO.isEarlyClobber())
+      end = getNextSlot(getNextSlot(start));
+    else
+      end = getNextSlot(start);
     goto exit;
   }
 
   // If it is not dead on definition, it must be killed by a
   // subsequent instruction. Hence its interval is:
   // [defSlot(def), useSlot(kill)+1)
-  baseIndex += InstrSlots::NUM;
+  baseIndex = getNextIndex(baseIndex);
   while (++mi != MBB->end()) {
-    while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+    while (baseIndex.getVecIndex() < i2miMap_.size() &&
            getInstructionFromIndex(baseIndex) == 0)
-      baseIndex += InstrSlots::NUM;
+      baseIndex = getNextIndex(baseIndex);
     if (mi->killsRegister(interval.reg, tri_)) {
-      DOUT << " killed";
-      end = getUseIndex(baseIndex) + 1;
+      DEBUG(errs() << " killed");
+      end = getNextSlot(getUseIndex(baseIndex));
       goto exit;
     } else {
       int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
@@ -791,21 +952,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
           // Then the register is essentially dead at the instruction that defines
           // it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
-          DOUT << " dead";
-          end = start + 1;
+          DEBUG(errs() << " dead");
+          end = getNextSlot(start);
         }
         goto exit;
       }
     }
     
-    baseIndex += InstrSlots::NUM;
+    baseIndex = getNextIndex(baseIndex);
   }
   
   // The only case we should have a dead physreg here without a killing or
   // instruction where we know it's dead is if it is live-in to the function
   // and never used. Another possible case is the implicit use of the
   // physical register has been deleted by two-address pass.
-  end = start + 1;
+  end = getNextSlot(start);
 
 exit:
   assert(start < end && "did not find end of interval?");
@@ -819,13 +980,13 @@ exit:
     ValNo->setHasRedefByEC(true);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
-  interval.addKill(LR.valno, end);
-  DOUT << " +" << LR << '\n';
+  LR.valno->addKill(end);
+  DEBUG(errs() << " +" << LR << '\n');
 }
 
 void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
                                       MachineBasicBlock::iterator MI,
-                                      unsigned MIIdx,
+                                      LiveIndex MIIdx,
                                       MachineOperand& MO,
                                       unsigned MOIdx) {
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -852,25 +1013,28 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
 }
 
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
-                                         unsigned MIIdx,
+                                         LiveIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
-  DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg));
+  DEBUG({
+      errs() << "\t\tlivein register: ";
+      printRegName(interval.reg, tri_);
+    });
 
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
   MachineBasicBlock::iterator mi = MBB->begin();
-  unsigned baseIndex = MIIdx;
-  unsigned start = baseIndex;
-  while (baseIndex / InstrSlots::NUM < i2miMap_.size() && 
+  LiveIndex baseIndex = MIIdx;
+  LiveIndex start = baseIndex;
+  while (baseIndex.getVecIndex() < i2miMap_.size() && 
          getInstructionFromIndex(baseIndex) == 0)
-    baseIndex += InstrSlots::NUM;
-  unsigned end = baseIndex;
+    baseIndex = getNextIndex(baseIndex);
+  LiveIndex end = baseIndex;
   bool SeenDefUse = false;
   
   while (mi != MBB->end()) {
     if (mi->killsRegister(interval.reg, tri_)) {
-      DOUT << " killed";
-      end = getUseIndex(baseIndex) + 1;
+      DEBUG(errs() << " killed");
+      end = getNextSlot(getUseIndex(baseIndex));
       SeenDefUse = true;
       break;
     } else if (mi->modifiesRegister(interval.reg, tri_)) {
@@ -878,40 +1042,167 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
-      DOUT << " dead";
-      end = getDefIndex(start) + 1;
+      DEBUG(errs() << " dead");
+      end = getNextSlot(getDefIndex(start));
       SeenDefUse = true;
       break;
     }
 
-    baseIndex += InstrSlots::NUM;
+    baseIndex = getNextIndex(baseIndex);
     ++mi;
     if (mi != MBB->end()) {
-      while (baseIndex / InstrSlots::NUM < i2miMap_.size() && 
+      while (baseIndex.getVecIndex() < i2miMap_.size() && 
              getInstructionFromIndex(baseIndex) == 0)
-        baseIndex += InstrSlots::NUM;
+        baseIndex = getNextIndex(baseIndex);
     }
   }
 
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
     if (isAlias) {
-      DOUT << " dead";
-      end = getDefIndex(MIIdx) + 1;
+      DEBUG(errs() << " dead");
+      end = getNextSlot(getDefIndex(MIIdx));
     } else {
-      DOUT << " live through";
+      DEBUG(errs() << " live through");
       end = baseIndex;
     }
   }
 
   VNInfo *vni =
-    interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator);
+    interval.getNextValue(LiveIndex(MBB->getNumber()),
+                          0, false, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
   
   interval.addRange(LR);
-  interval.addKill(LR.valno, end);
-  DOUT << " +" << LR << '\n';
+  LR.valno->addKill(end);
+  DEBUG(errs() << " +" << LR << '\n');
+}
+
+bool
+LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
+                                   SmallVector<MachineInstr*,16> &IdentCopies,
+                                   SmallVector<MachineInstr*,16> &OtherCopies) {
+  bool HaveConflict = false;
+  unsigned NumIdent = 0;
+  for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg),
+         re = mri_->def_end(); ri != re; ++ri) {
+    MachineInstr *MI = &*ri;
+    unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+    if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+      return false;
+    if (SrcReg != DstInt.reg) {
+      OtherCopies.push_back(MI);
+      HaveConflict |= DstInt.liveAt(getInstructionIndex(MI));
+    } else {
+      IdentCopies.push_back(MI);
+      ++NumIdent;
+    }
+  }
+
+  if (!HaveConflict)
+    return false; // Let coalescer handle it
+  return IdentCopies.size() > OtherCopies.size();
+}
+
+void LiveIntervals::performEarlyCoalescing() {
+  if (!EarlyCoalescing)
+    return;
+
+  /// Perform early coalescing: eliminate copies which feed into phi joins
+  /// and whose sources are defined by the phi joins.
+  for (unsigned i = 0, e = phiJoinCopies.size(); i != e; ++i) {
+    MachineInstr *Join = phiJoinCopies[i];
+    if (CoalescingLimit != -1 && (int)numCoalescing == CoalescingLimit)
+      break;
+
+    unsigned PHISrc, PHIDst, SrcSubReg, DstSubReg;
+    bool isMove= tii_->isMoveInstr(*Join, PHISrc, PHIDst, SrcSubReg, DstSubReg);
+#ifndef NDEBUG
+    assert(isMove && "PHI join instruction must be a move!");
+#else
+    isMove = isMove;
+#endif
+
+    LiveInterval &DstInt = getInterval(PHIDst);
+    LiveInterval &SrcInt = getInterval(PHISrc);
+    SmallVector<MachineInstr*, 16> IdentCopies;
+    SmallVector<MachineInstr*, 16> OtherCopies;
+    if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies))
+      continue;
+
+    DEBUG(errs() << "PHI Join: " << *Join);
+    assert(DstInt.containsOneValue() && "PHI join should have just one val#!");
+    VNInfo *VNI = DstInt.getValNumInfo(0);
+
+    // Change the non-identity copies to directly target the phi destination.
+    for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) {
+      MachineInstr *PHICopy = OtherCopies[i];
+      DEBUG(errs() << "Moving: " << *PHICopy);
+
+      LiveIndex MIIndex = getInstructionIndex(PHICopy);
+      LiveIndex DefIndex = getDefIndex(MIIndex);
+      LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
+      LiveIndex StartIndex = SLR->start;
+      LiveIndex EndIndex = SLR->end;
+
+      // Delete val# defined by the now identity copy and add the range from
+      // beginning of the mbb to the end of the range.
+      SrcInt.removeValNo(SLR->valno);
+      DEBUG(errs() << "  added range [" << StartIndex << ','
+            << EndIndex << "] to reg" << DstInt.reg << '\n');
+      if (DstInt.liveAt(StartIndex))
+        DstInt.removeRange(StartIndex, EndIndex);
+      VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true,
+                                           VNInfoAllocator);
+      NewVNI->setHasPHIKill(true);
+      DstInt.addRange(LiveRange(StartIndex, EndIndex, NewVNI));
+      for (unsigned j = 0, ee = PHICopy->getNumOperands(); j != ee; ++j) {
+        MachineOperand &MO = PHICopy->getOperand(j);
+        if (!MO.isReg() || MO.getReg() != PHISrc)
+          continue;
+        MO.setReg(PHIDst);
+      }
+    }
+
+    // Now let's eliminate all the would-be identity copies.
+    for (unsigned i = 0, e = IdentCopies.size(); i != e; ++i) {
+      MachineInstr *PHICopy = IdentCopies[i];
+      DEBUG(errs() << "Coalescing: " << *PHICopy);
+
+      LiveIndex MIIndex = getInstructionIndex(PHICopy);
+      LiveIndex DefIndex = getDefIndex(MIIndex);
+      LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
+      LiveIndex StartIndex = SLR->start;
+      LiveIndex EndIndex = SLR->end;
+
+      // Delete val# defined by the now identity copy and add the range from
+      // beginning of the mbb to the end of the range.
+      SrcInt.removeValNo(SLR->valno);
+      RemoveMachineInstrFromMaps(PHICopy);
+      PHICopy->eraseFromParent();
+      DEBUG(errs() << "  added range [" << StartIndex << ','
+            << EndIndex << "] to reg" << DstInt.reg << '\n');
+      DstInt.addRange(LiveRange(StartIndex, EndIndex, VNI));
+    }
+
+    // Remove the phi join and update the phi block liveness.
+    LiveIndex MIIndex = getInstructionIndex(Join);
+    LiveIndex UseIndex = getUseIndex(MIIndex);
+    LiveIndex DefIndex = getDefIndex(MIIndex);
+    LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex);
+    LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex);
+    DLR->valno->setCopy(0);
+    DLR->valno->setIsDefAccurate(false);
+    DstInt.addRange(LiveRange(SLR->start, SLR->end, DLR->valno));
+    SrcInt.removeRange(SLR->start, SLR->end);
+    assert(SrcInt.empty());
+    removeInterval(PHISrc);
+    RemoveMachineInstrFromMaps(Join);
+    Join->eraseFromParent();
+
+    ++numCoalescing;
+  }
 }
 
 /// computeIntervals - computes the live intervals for virtual
@@ -919,17 +1210,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 /// live interval is an interval [i, j) where 1 <= i <= j < N for
 /// which a variable is live
 void LiveIntervals::computeIntervals() { 
+  DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
 
-  DOUT << "********** COMPUTING LIVE INTERVALS **********\n"
-       << "********** Function: "
-       << ((Value*)mf_->getFunction())->getName() << '\n';
-  
+  SmallVector<unsigned, 8> UndefUses;
   for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
-    unsigned MIIndex = getMBBStartIdx(MBB);
-    DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+    LiveIndex MIIndex = getMBBStartIdx(MBB);
+    DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
 
@@ -945,37 +1236,52 @@ void LiveIntervals::computeIntervals() {
     }
     
     // Skip over empty initial indices.
-    while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+    while (MIIndex.getVecIndex() < i2miMap_.size() &&
            getInstructionFromIndex(MIIndex) == 0)
-      MIIndex += InstrSlots::NUM;
+      MIIndex = getNextIndex(MIIndex);
     
     for (; MI != miEnd; ++MI) {
-      DOUT << MIIndex << "\t" << *MI;
+      DEBUG(errs() << MIIndex << "\t" << *MI);
 
       // Handle defs.
       for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
         MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+
         // handle register defs - build intervals
-        if (MO.isReg() && MO.getReg() && MO.isDef()) {
+        if (MO.isDef())
           handleRegisterDef(MBB, MI, MIIndex, MO, i);
-        }
+        else if (MO.isUndef())
+          UndefUses.push_back(MO.getReg());
       }
 
       // Skip over the empty slots after each instruction.
       unsigned Slots = MI->getDesc().getNumDefs();
       if (Slots == 0)
         Slots = 1;
-      MIIndex += InstrSlots::NUM * Slots;
+
+      while (Slots--)
+        MIIndex = getNextIndex(MIIndex);
       
       // Skip over empty indices.
-      while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+      while (MIIndex.getVecIndex() < i2miMap_.size() &&
              getInstructionFromIndex(MIIndex) == 0)
-        MIIndex += InstrSlots::NUM;
+        MIIndex = getNextIndex(MIIndex);
     }
   }
+
+  // Create empty intervals for registers defined by implicit_def's (except
+  // for those implicit_def that define values which are liveout of their
+  // blocks.
+  for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
+    unsigned UndefReg = UndefUses[i];
+    (void)getOrCreateInterval(UndefReg);
+  }
 }
 
-bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End,
+bool LiveIntervals::findLiveInMBBs(
+                              LiveIndex Start, LiveIndex End,
                               SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
   std::vector<IdxMBBPair>::const_iterator I =
     std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
@@ -991,7 +1297,8 @@ bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End,
   return ResVal;
 }
 
-bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End,
+bool LiveIntervals::findReachableMBBs(
+                              LiveIndex Start, LiveIndex End,
                               SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
   std::vector<IdxMBBPair>::const_iterator I =
     std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
@@ -1028,23 +1335,23 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
 /// getVNInfoSourceReg - Helper function that parses the specified VNInfo
 /// copy field and returns the source register that defines it.
 unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
-  if (!VNI->copy)
+  if (!VNI->getCopy())
     return 0;
 
-  if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+  if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
     // If it's extracting out of a physical register, return the sub-register.
-    unsigned Reg = VNI->copy->getOperand(1).getReg();
+    unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
-      Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm());
+      Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
     return Reg;
-  } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-             VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
-    return VNI->copy->getOperand(2).getReg();
+  } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+             VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+    return VNI->getCopy()->getOperand(2).getReg();
 
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-  if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+  if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg))
     return SrcReg;
-  assert(0 && "Unrecognized copy instruction!");
+  llvm_unreachable("Unrecognized copy instruction!");
   return 0;
 }
 
@@ -1083,8 +1390,8 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
 /// isValNoAvailableAt - Return true if the val# of the specified interval
 /// which reaches the given instruction also reaches the specified use index.
 bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
-                                       unsigned UseIdx) const {
-  unsigned Index = getInstructionIndex(MI);  
+                                       LiveIndex UseIdx) const {
+  LiveIndex Index = getInstructionIndex(MI);  
   VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
   LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
   return UI != li.end() && UI->valno == ValNo;
@@ -1099,102 +1406,19 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
   if (DisableReMat)
     return false;
 
-  if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-    return true;
-
-  int FrameIdx = 0;
-  if (tii_->isLoadFromStackSlot(MI, FrameIdx) &&
-      mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx))
-    // FIXME: Let target specific isReallyTriviallyReMaterializable determines
-    // this but remember this is not safe to fold into a two-address
-    // instruction.
-    // This is a load from fixed stack slot. It can be rematerialized.
-    return true;
-
-  // If the target-specific rules don't identify an instruction as
-  // being trivially rematerializable, use some target-independent
-  // rules.
-  if (!MI->getDesc().isRematerializable() ||
-      !tii_->isTriviallyReMaterializable(MI)) {
-    if (!EnableAggressiveRemat)
-      return false;
-
-    // If the instruction accesses memory but the memoperands have been lost,
-    // we can't analyze it.
-    const TargetInstrDesc &TID = MI->getDesc();
-    if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty())
-      return false;
-
-    // Avoid instructions obviously unsafe for remat.
-    if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable())
-      return false;
-
-    // If the instruction accesses memory and the memory could be non-constant,
-    // assume the instruction is not rematerializable.
-    for (std::list<MachineMemOperand>::const_iterator
-           I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){
-      const MachineMemOperand &MMO = *I;
-      if (MMO.isVolatile() || MMO.isStore())
-        return false;
-      const Value *V = MMO.getValue();
-      if (!V)
-        return false;
-      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-        if (!PSV->isConstant(mf_->getFrameInfo()))
-          return false;
-      } else if (!aa_->pointsToConstantMemory(V))
-        return false;
-    }
-
-    // If any of the registers accessed are non-constant, conservatively assume
-    // the instruction is not rematerializable.
-    unsigned ImpUse = 0;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg()) {
-        unsigned Reg = MO.getReg();
-        if (Reg == 0)
-          continue;
-        if (TargetRegisterInfo::isPhysicalRegister(Reg))
-          return false;
-
-        // Only allow one def, and that in the first operand.
-        if (MO.isDef() != (i == 0))
-          return false;
-
-        // Only allow constant-valued registers.
-        bool IsLiveIn = mri_->isLiveIn(Reg);
-        MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg),
-                                          E = mri_->def_end();
-
-        // For the def, it should be the only def of that register.
-        if (MO.isDef() && (next(I) != E || IsLiveIn))
-          return false;
-
-        if (MO.isUse()) {
-          // Only allow one use other register use, as that's all the
-          // remat mechanisms support currently.
-          if (Reg != li.reg) {
-            if (ImpUse == 0)
-              ImpUse = Reg;
-            else if (Reg != ImpUse)
-              return false;
-          }
-          // For the use, there should be only one associated def.
-          if (I != E && (next(I) != E || IsLiveIn))
-            return false;
-        }
-      }
-    }
-  }
+  if (!tii_->isTriviallyReMaterializable(MI, aa_))
+    return false;
 
+  // Target-specific code can mark an instruction as being rematerializable
+  // if it has one virtual reg use, though it had better be something like
+  // a PIC base register which is likely to be live everywhere.
   unsigned ImpUse = getReMatImplicitUse(li, MI);
   if (ImpUse) {
     const LiveInterval &ImpLi = getInterval(ImpUse);
     for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
            re = mri_->use_end(); ri != re; ++ri) {
       MachineInstr *UseMI = &*ri;
-      unsigned UseIdx = getInstructionIndex(UseMI);
+      LiveIndex UseIdx = getInstructionIndex(UseMI);
       if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
         continue;
       if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
@@ -1279,7 +1503,7 @@ static bool FilterFoldedOps(MachineInstr *MI,
 /// returns true.
 bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
                                          VirtRegMap &vrm, MachineInstr *DefMI,
-                                         unsigned InstrIdx,
+                                         LiveIndex InstrIdx,
                                          SmallVector<unsigned, 2> &Ops,
                                          bool isSS, int Slot, unsigned Reg) {
   // If it is an implicit def instruction, just delete it.
@@ -1318,7 +1542,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
     vrm.transferRestorePts(MI, fmi);
     vrm.transferEmergencySpills(MI, fmi);
     mi2iMap_.erase(MI);
-    i2miMap_[InstrIdx /InstrSlots::NUM] = fmi;
+    i2miMap_[InstrIdx.getVecIndex()] = fmi;
     mi2iMap_[fmi] = InstrIdx;
     MI = MBB.insert(MBB.erase(MI), fmi);
     ++numFolds;
@@ -1391,7 +1615,8 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
 /// for addIntervalsForSpills to rewrite uses / defs for the given live range.
 bool LiveIntervals::
 rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-                 bool TrySplit, unsigned index, unsigned end,  MachineInstr *MI,
+                 bool TrySplit, LiveIndex index, LiveIndex end, 
+                 MachineInstr *MI,
                  MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
                  unsigned Slot, int LdSlot,
                  bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
@@ -1422,8 +1647,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       // If this is the rematerializable definition MI itself and
       // all of its uses are rematerialized, simply delete it.
       if (MI == ReMatOrigDefMI && CanDelete) {
-        DOUT << "\t\t\t\tErasing re-materlizable def: ";
-        DOUT << MI << '\n';
+        DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: "
+                     << MI << '\n');
         RemoveMachineInstrFromMaps(MI);
         vrm.RemoveMachineInstrFromMaps(MI);
         MI->eraseFromParent();
@@ -1465,23 +1690,13 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
         continue;
       if (RegJ == RegI) {
         Ops.push_back(j);
-        HasUse |= MOj.isUse();
-        HasDef |= MOj.isDef();
+        if (!MOj.isUndef()) {
+          HasUse |= MOj.isUse();
+          HasDef |= MOj.isDef();
+        }
       }
     }
 
-    if (HasUse && !li.liveAt(getUseIndex(index)))
-      // Must be defined by an implicit def. It should not be spilled. Note,
-      // this is for correctness reason. e.g.
-      // 8   %reg1024<def> = IMPLICIT_DEF
-      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-      // The live range [12, 14) are not part of the r1024 live interval since
-      // it's defined by an implicit def. It will not conflicts with live
-      // interval of r1025. Now suppose both registers are spilled, you can
-      // easily see a situation where both registers are reloaded before
-      // the INSERT_SUBREG and both target registers that would overlap.
-      HasUse = false;
-
     // Create a new virtual register for the spill interval.
     // Create the new register now so we can map the fold instruction
     // to the new register so when it is unfolded we get the correct
@@ -1537,7 +1752,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
             
     if (CreatedNewVReg) {
       if (DefIsReMat) {
-        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/);
+        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
         if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
           // Each valnum may have its own remat id.
           ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
@@ -1577,38 +1792,46 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
 
     if (HasUse) {
       if (CreatedNewVReg) {
-        LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
-                     nI.getNextValue(0, 0, false, VNInfoAllocator));
-        DOUT << " +" << LR;
+        LiveRange LR(getLoadIndex(index), getNextSlot(getUseIndex(index)),
+                     nI.getNextValue(LiveIndex(), 0, false,
+                                     VNInfoAllocator));
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
       } else {
         // Extend the split live interval to this def / use.
-        unsigned End = getUseIndex(index)+1;
+        LiveIndex End = getNextSlot(getUseIndex(index));
         LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
                      nI.getValNumInfo(nI.getNumValNums()-1));
-        DOUT << " +" << LR;
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
       }
     }
     if (HasDef) {
       LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                   nI.getNextValue(0, 0, false, VNInfoAllocator));
-      DOUT << " +" << LR;
+                   nI.getNextValue(LiveIndex(), 0, false,
+                                   VNInfoAllocator));
+      DEBUG(errs() << " +" << LR);
       nI.addRange(LR);
     }
 
-    DOUT << "\t\t\t\tAdded new interval: ";
-    nI.print(DOUT, tri_);
-    DOUT << '\n';
+    DEBUG({
+        errs() << "\t\t\t\tAdded new interval: ";
+        nI.print(errs(), tri_);
+        errs() << '\n';
+      });
   }
   return CanFold;
 }
 bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
                                    const VNInfo *VNI,
-                                   MachineBasicBlock *MBB, unsigned Idx) const {
-  unsigned End = getMBBEndIdx(MBB);
+                                   MachineBasicBlock *MBB,
+                                   LiveIndex Idx) const {
+  LiveIndex End = getMBBEndIdx(MBB);
   for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
-    unsigned KillIdx = VNI->kills[j];
+    if (VNI->kills[j].isPHIIndex())
+      continue;
+
+    LiveIndex KillIdx = VNI->kills[j];
     if (KillIdx > Idx && KillIdx < End)
       return true;
   }
@@ -1619,11 +1842,11 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
 /// during spilling.
 namespace {
   struct RewriteInfo {
-    unsigned Index;
+    LiveIndex Index;
     MachineInstr *MI;
     bool HasUse;
     bool HasDef;
-    RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d)
+    RewriteInfo(LiveIndex i, MachineInstr *mi, bool u, bool d)
       : Index(i), MI(mi), HasUse(u), HasDef(d) {}
   };
 
@@ -1652,8 +1875,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
                     std::vector<LiveInterval*> &NewLIs) {
   bool AllCanFold = true;
   unsigned NewVReg = 0;
-  unsigned start = getBaseIndex(I->start);
-  unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM;
+  LiveIndex start = getBaseIndex(I->start);
+  LiveIndex end = getNextIndex(getBaseIndex(getPrevSlot(I->end)));
 
   // First collect all the def / use in this live range that will be rewritten.
   // Make sure they are sorted according to instruction index.
@@ -1664,10 +1887,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineOperand &O = ri.getOperand();
     ++ri;
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
-    unsigned index = getInstructionIndex(MI);
+    LiveIndex index = getInstructionIndex(MI);
     if (index < start || index >= end)
       continue;
-    if (O.isUse() && !li.liveAt(getUseIndex(index)))
+
+    if (O.isUndef())
       // Must be defined by an implicit def. It should not be spilled. Note,
       // this is for correctness reason. e.g.
       // 8   %reg1024<def> = IMPLICIT_DEF
@@ -1687,7 +1911,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
   for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
     RewriteInfo &rwi = RewriteMIs[i];
     ++i;
-    unsigned index = rwi.Index;
+    LiveIndex index = rwi.Index;
     bool MIHasUse = rwi.HasUse;
     bool MIHasDef = rwi.HasDef;
     MachineInstr *MI = rwi.MI;
@@ -1773,7 +1997,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
           HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index));
         else {
           // If this is a two-address code, then this index starts a new VNInfo.
-          const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index));
+          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(getDefIndex(index));
           if (VNI)
             HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index));
         }
@@ -1786,7 +2010,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
             SpillIdxes.insert(std::make_pair(MBBId, S));
           } else if (SII->second.back().vreg != NewVReg) {
             SII->second.push_back(SRInfo(index, NewVReg, true));
-          } else if ((int)index > SII->second.back().index) {
+          } else if (index > SII->second.back().index) {
             // If there is an earlier def and this is a two-address
             // instruction, then it's not possible to fold the store (which
             // would also fold the load).
@@ -1797,7 +2021,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
           SpillMBBs.set(MBBId);
         } else if (SII != SpillIdxes.end() &&
                    SII->second.back().vreg == NewVReg &&
-                   (int)index > SII->second.back().index) {
+                   index > SII->second.back().index) {
           // There is an earlier def that's not killed (must be two-address).
           // The spill is no longer needed.
           SII->second.pop_back();
@@ -1814,7 +2038,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
         SpillIdxes.find(MBBId);
       if (SII != SpillIdxes.end() &&
           SII->second.back().vreg == NewVReg &&
-          (int)index > SII->second.back().index)
+          index > SII->second.back().index)
         // Use(s) following the last def, it's not safe to fold the spill.
         SII->second.back().canFold = false;
       DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
@@ -1848,8 +2072,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
   }
 }
 
-bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr,
-                        BitVector &RestoreMBBs,
+bool LiveIntervals::alsoFoldARestore(int Id, LiveIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
                         DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
   if (!RestoreMBBs[Id])
     return false;
@@ -1862,15 +2086,15 @@ bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr,
   return false;
 }
 
-void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr,
-                        BitVector &RestoreMBBs,
+void LiveIntervals::eraseRestoreInfo(int Id, LiveIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
                         DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
   if (!RestoreMBBs[Id])
     return;
   std::vector<SRInfo> &Restores = RestoreIdxes[Id];
   for (unsigned i = 0, e = Restores.size(); i != e; ++i)
     if (Restores[i].index == index && Restores[i].vreg)
-      Restores[i].index = -1;
+      Restores[i].index = LiveIndex();
 }
 
 /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
@@ -1920,9 +2144,11 @@ addIntervalsForSpillsFast(const LiveInterval &li,
   assert(li.weight != HUGE_VALF &&
          "attempt to spill already spilled interval!");
 
-  DOUT << "\t\t\t\tadding intervals for spills for interval: ";
-  DEBUG(li.dump());
-  DOUT << '\n';
+  DEBUG({
+      errs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.dump();
+      errs() << '\n';
+    });
 
   const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
 
@@ -1967,27 +2193,31 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       }
       
       // Fill in  the new live interval.
-      unsigned index = getInstructionIndex(MI);
+      LiveIndex index = getInstructionIndex(MI);
       if (HasUse) {
         LiveRange LR(getLoadIndex(index), getUseIndex(index),
-                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
-        DOUT << " +" << LR;
+                     nI.getNextValue(LiveIndex(), 0, false,
+                                     getVNInfoAllocator()));
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
       if (HasDef) {
         LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
-        DOUT << " +" << LR;
+                     nI.getNextValue(LiveIndex(), 0, false,
+                                     getVNInfoAllocator()));
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
         vrm.addSpillPoint(NewVReg, true, MI);
       }
       
       added.push_back(&nI);
         
-      DOUT << "\t\t\t\tadded new interval: ";
-      DEBUG(nI.dump());
-      DOUT << '\n';
+      DEBUG({
+          errs() << "\t\t\t\tadded new interval: ";
+          nI.dump();
+          errs() << '\n';
+        });
     }
     
     
@@ -2008,9 +2238,11 @@ addIntervalsForSpills(const LiveInterval &li,
   assert(li.weight != HUGE_VALF &&
          "attempt to spill already spilled interval!");
 
-  DOUT << "\t\t\t\tadding intervals for spills for interval: ";
-  li.print(DOUT, tri_);
-  DOUT << '\n';
+  DEBUG({
+      errs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.print(errs(), tri_);
+      errs() << '\n';
+    });
 
   // Each bit specify whether a spill is required in the MBB.
   BitVector SpillMBBs(mf_->getNumBlockIDs());
@@ -2036,8 +2268,8 @@ addIntervalsForSpills(const LiveInterval &li,
   if (vrm.getPreSplitReg(li.reg)) {
     vrm.setIsSplitFromReg(li.reg, 0);
     // Unset the split kill marker on the last use.
-    unsigned KillIdx = vrm.getKillPoint(li.reg);
-    if (KillIdx) {
+    LiveIndex KillIdx = vrm.getKillPoint(li.reg);
+    if (KillIdx != LiveIndex()) {
       MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
       assert(KillMI && "Last use disappeared?");
       int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
@@ -2081,9 +2313,7 @@ addIntervalsForSpills(const LiveInterval &li,
     return NewLIs;
   }
 
-  bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li);
-  if (SplitLimit != -1 && (int)numSplits >= SplitLimit)
-    TrySplit = false;
+  bool TrySplit = !intervalIsInOneMBB(li);
   if (TrySplit)
     ++numSplits;
   bool NeedStackSlot = false;
@@ -2102,7 +2332,7 @@ addIntervalsForSpills(const LiveInterval &li,
       ReMatOrigDefs[VN] = ReMatDefMI;
       // Original def may be modified so we have to make a copy here.
       MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
-      ClonedMIs.push_back(Clone);
+      CloneMIs.push_back(Clone);
       ReMatDefs[VN] = Clone;
 
       bool CanDelete = true;
@@ -2165,7 +2395,7 @@ addIntervalsForSpills(const LiveInterval &li,
     while (Id != -1) {
       std::vector<SRInfo> &spills = SpillIdxes[Id];
       for (unsigned i = 0, e = spills.size(); i != e; ++i) {
-        int index = spills[i].index;
+        LiveIndex index = spills[i].index;
         unsigned VReg = spills[i].vreg;
         LiveInterval &nI = getOrCreateInterval(VReg);
         bool isReMat = vrm.isReMaterialized(VReg);
@@ -2203,7 +2433,7 @@ addIntervalsForSpills(const LiveInterval &li,
             if (FoundUse) {
               // Also folded uses, do not issue a load.
               eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
-              nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+              nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index)));
             }
             nI.removeRange(getDefIndex(index), getStoreIndex(index));
           }
@@ -2228,8 +2458,8 @@ addIntervalsForSpills(const LiveInterval &li,
   while (Id != -1) {
     std::vector<SRInfo> &restores = RestoreIdxes[Id];
     for (unsigned i = 0, e = restores.size(); i != e; ++i) {
-      int index = restores[i].index;
-      if (index == -1)
+      LiveIndex index = restores[i].index;
+      if (index == LiveIndex())
         continue;
       unsigned VReg = restores[i].vreg;
       LiveInterval &nI = getOrCreateInterval(VReg);
@@ -2284,7 +2514,7 @@ addIntervalsForSpills(const LiveInterval &li,
       // If folding is not possible / failed, then tell the spiller to issue a
       // load / rematerialization for us.
       if (Folded)
-        nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+        nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index)));
       else
         vrm.addRestorePoint(VReg, MI);
     }
@@ -2300,7 +2530,7 @@ addIntervalsForSpills(const LiveInterval &li,
       LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI);
       if (!AddedKill.count(LI)) {
         LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
-        unsigned LastUseIdx = getBaseIndex(LR->end);
+        LiveIndex LastUseIdx = getBaseIndex(LR->end);
         MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
         int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
         assert(UseIdx != -1);
@@ -2351,7 +2581,7 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
     MachineInstr *MI = O.getParent();
-    unsigned Index = getInstructionIndex(MI);
+    LiveIndex Index = getInstructionIndex(MI);
     if (pli.liveAt(Index))
       ++NumConflicts;
   }
@@ -2382,29 +2612,31 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
     if (SeenMIs.count(MI))
       continue;
     SeenMIs.insert(MI);
-    unsigned Index = getInstructionIndex(MI);
+    LiveIndex Index = getInstructionIndex(MI);
     if (pli.liveAt(Index)) {
       vrm.addEmergencySpill(SpillReg, MI);
-      unsigned StartIdx = getLoadIndex(Index);
-      unsigned EndIdx = getStoreIndex(Index)+1;
+      LiveIndex StartIdx = getLoadIndex(Index);
+      LiveIndex EndIdx = getNextSlot(getStoreIndex(Index));
       if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
         pli.removeRange(StartIdx, EndIdx);
         Cut = true;
       } else {
-        cerr << "Ran out of registers during register allocation!\n";
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "Ran out of registers during register allocation!";
         if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
-          cerr << "Please check your inline asm statement for invalid "
+          Msg << "\nPlease check your inline asm statement for invalid "
                << "constraints:\n";
-          MI->print(cerr.stream(), tm_);
+          MI->print(Msg, tm_);
         }
-        exit(1);
+        llvm_report_error(Msg.str());
       }
       for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) {
         if (!hasInterval(*AS))
           continue;
         LiveInterval &spli = getInterval(*AS);
         if (spli.liveAt(Index))
-          spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1);
+          spli.removeRange(getLoadIndex(Index), getNextSlot(getStoreIndex(Index)));
       }
     }
   }
@@ -2412,16 +2644,18 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
 }
 
 LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
-                                                   MachineInstr* startInst) {
+                                                  MachineInstr* startInst) {
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
-            getInstructionIndex(startInst) + InstrSlots::DEF,
-            startInst, true, getVNInfoAllocator());
+    LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF),
+    startInst, true, getVNInfoAllocator());
   VN->setHasPHIKill(true);
-  VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
-  LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
-               getMBBEndIdx(startInst->getParent()) + 1, VN);
+  VN->kills.push_back(terminatorGaps[startInst->getParent()]);
+  LiveRange LR(
+    LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF),
+    getNextSlot(getMBBEndIdx(startInst->getParent())), VN);
   Interval.addRange(LR);
   
   return LR;
 }
+
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 86f7ea2..a7bea1f 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include <limits>
 using namespace llvm;
@@ -52,15 +53,16 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) {
 }
 
 /// print - Implement the dump method.
-void LiveStacks::print(std::ostream &O, const Module*) const {
-  O << "********** INTERVALS **********\n";
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+  OS << "********** INTERVALS **********\n";
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    I->second.print(O);
+    I->second.print(OS);
     int Slot = I->first;
     const TargetRegisterClass *RC = getIntervalRegClass(Slot);
     if (RC)
-      O << " [" << RC->getName() << "]\n";
+      OS << " [" << RC->getName() << "]\n";
     else
-      O << " [Unknown]\n";
+      OS << " [Unknown]\n";
   }
 }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index bd84508..139e029 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -37,7 +37,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Config/alloca.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -48,20 +47,21 @@ static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
 void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(UnreachableMachineBlockElimID);
   AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
 void LiveVariables::VarInfo::dump() const {
-  cerr << "  Alive in blocks: ";
+  errs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
            E = AliveBlocks.end(); I != E; ++I)
-    cerr << *I << ", ";
-  cerr << "\n  Killed by:";
+    errs() << *I << ", ";
+  errs() << "\n  Killed by:";
   if (Kills.empty())
-    cerr << " No instructions.\n";
+    errs() << " No instructions.\n";
   else {
     for (unsigned i = 0, e = Kills.size(); i != e; ++i)
-      cerr << "\n    #" << i << ": " << *Kills[i];
-    cerr << "\n";
+      errs() << "\n    #" << i << ": " << *Kills[i];
+    errs() << "\n";
   }
 }
 
@@ -180,9 +180,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
 }
 
 /// FindLastPartialDef - Return the last partial def of the specified register.
-/// Also returns the sub-register that's defined.
+/// Also returns the sub-registers that're defined by the instruction.
 MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
-                                                unsigned &PartDefReg) {
+                                            SmallSet<unsigned,4> &PartDefRegs) {
   unsigned LastDefReg = 0;
   unsigned LastDefDist = 0;
   MachineInstr *LastDef = NULL;
@@ -198,7 +198,23 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
       LastDefDist = Dist;
     }
   }
-  PartDefReg = LastDefReg;
+
+  if (!LastDef)
+    return 0;
+
+  PartDefRegs.insert(LastDefReg);
+  for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = LastDef->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+      continue;
+    unsigned DefReg = MO.getReg();
+    if (TRI->isSubRegister(Reg, DefReg)) {
+      PartDefRegs.insert(DefReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+           unsigned SubReg = *SubRegs; ++SubRegs)
+        PartDefRegs.insert(SubReg);
+    }
+  }
   return LastDef;
 }
 
@@ -216,8 +232,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
     // ...
     //    = EAX
     // All of the sub-registers must have been defined before the use of Reg!
-    unsigned PartDefReg = 0;
-    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg);
+    SmallSet<unsigned, 4> PartDefRegs;
+    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
     // If LastPartialDef is NULL, it must be using a livein register.
     if (LastPartialDef) {
       LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
@@ -228,7 +244,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
            unsigned SubReg = *SubRegs; ++SubRegs) {
         if (Processed.count(SubReg))
           continue;
-        if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg))
+        if (PartDefRegs.count(SubReg))
           continue;
         // This part of Reg was defined before the last partial def. It's killed
         // here.
@@ -249,78 +265,13 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
     PhysRegUse[SubReg] =  MI;
 }
 
-/// hasRegisterUseBelow - Return true if the specified register is used after
-/// the current instruction and before it's next definition.
-bool LiveVariables::hasRegisterUseBelow(unsigned Reg,
-                                        MachineBasicBlock::iterator I,
-                                        MachineBasicBlock *MBB) {
-  if (I == MBB->end())
-    return false;
-
-  // First find out if there are any uses / defs below.
-  bool hasDistInfo = true;
-  unsigned CurDist = DistanceMap[I];
-  SmallVector<MachineInstr*, 4> Uses;
-  SmallVector<MachineInstr*, 4> Defs;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineOperand &UDO = RI.getOperand();
-    MachineInstr *UDMI = &*RI;
-    if (UDMI->getParent() != MBB)
-      continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-    bool isBelow = false;
-    if (DI == DistanceMap.end()) {
-      // Must be below if it hasn't been assigned a distance yet.
-      isBelow = true;
-      hasDistInfo = false;
-    } else if (DI->second > CurDist)
-      isBelow = true;
-    if (isBelow) {
-      if (UDO.isUse())
-        Uses.push_back(UDMI);
-      if (UDO.isDef())
-        Defs.push_back(UDMI);
-    }
-  }
-
-  if (Uses.empty())
-    // No uses below.
-    return false;
-  else if (!Uses.empty() && Defs.empty())
-    // There are uses below but no defs below.
-    return true;
-  // There are both uses and defs below. We need to know which comes first.
-  if (!hasDistInfo) {
-    // Complete DistanceMap for this MBB. This information is computed only
-    // once per MBB.
-    ++I;
-    ++CurDist;
-    for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist)
-      DistanceMap.insert(std::make_pair(I, CurDist));
-  }
-
-  unsigned EarliestUse = DistanceMap[Uses[0]];
-  for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
-    unsigned Dist = DistanceMap[Uses[i]];
-    if (Dist < EarliestUse)
-      EarliestUse = Dist;
-  }
-  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
-    unsigned Dist = DistanceMap[Defs[i]];
-    if (Dist < EarliestUse)
-      // The register is defined before its first use below.
-      return false;
-  }
-  return true;
-}
-
 bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
-  if (!PhysRegUse[Reg] && !PhysRegDef[Reg])
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
     return false;
 
-  MachineInstr *LastRefOrPartRef = PhysRegUse[Reg]
-    ? PhysRegUse[Reg] : PhysRegDef[Reg];
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
   unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
   // The whole register is used.
   // AL =
@@ -339,9 +290,22 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   // AX<dead> = AL<imp-def>
   //    = AL<kill>
   // AX = 
+  MachineInstr *LastPartDef = 0;
+  unsigned LastPartDefDist = 0;
   SmallSet<unsigned, 8> PartUses;
   for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist) {
+        LastPartDefDist = Dist;
+        LastPartDef = Def;
+      }
+      continue;
+    }
     if (MachineInstr *Use = PhysRegUse[SubReg]) {
       PartUses.insert(SubReg);
       for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
@@ -354,35 +318,47 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     }
   }
 
-  if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI)
-    // If the last reference is the last def, then it's not used at all.
-    // That is, unless we are currently processing the last reference itself.
-    LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
-
-  // Partial uses. Mark register def dead and add implicit def of
-  // sub-registers which are used.
-  // EAX<dead>  = op  AL<imp-def>
-  // That is, EAX def is dead but AL def extends pass it.
-  // Enable this after live interval analysis is fixed to improve codegen!
-  else if (!PhysRegUse[Reg]) {
+  if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+    if (LastPartDef)
+      // The last partial def kills the register.
+      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/, true/*IsKill*/));
+    else
+      // If the last reference is the last def, then it's not used at all.
+      // That is, unless we are currently processing the last reference itself.
+      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+  } else if (!PhysRegUse[Reg]) {
+    // Partial uses. Mark register def dead and add implicit def of
+    // sub-registers which are used.
+    // EAX<dead>  = op  AL<imp-def>
+    // That is, EAX def is dead but AL def extends pass it.
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
-      if (PartUses.count(SubReg)) {
-        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
-                                                              true, true));
-        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
-          PartUses.erase(*SS);
+      if (!PartUses.count(SubReg))
+        continue;
+      bool NeedDef = true;
+      if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+        MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+        if (MO) {
+          NeedDef = false;
+          assert(!MO->isDead());
+        }
       }
+      if (NeedDef)
+        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+                                                 true/*IsDef*/, true/*IsImp*/));
+      LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        PartUses.erase(*SS);
     }
-  }
-  else
+  } else
     LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
   return true;
 }
 
-void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+                                     SmallVector<unsigned, 4> &Defs) {
   // What parts of the register are previously defined?
   SmallSet<unsigned, 32> Live;
   if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
@@ -398,6 +374,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
       // AL =
       // AH =
       //    = AX
+      if (Live.count(SubReg))
+        continue;
       if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
         Live.insert(SubReg);
         for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
@@ -408,68 +386,25 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
 
   // Start from the largest piece, find the last time any part of the register
   // is referenced.
-  if (!HandlePhysRegKill(Reg, MI)) {
-    // Only some of the sub-registers are used.
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-         unsigned SubReg = *SubRegs; ++SubRegs) {
-      if (!Live.count(SubReg))
-        // Skip if this sub-register isn't defined.
-        continue;
-      if (HandlePhysRegKill(SubReg, MI)) {
-        Live.erase(SubReg);
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
-          Live.erase(*SS);
-      }
-    }
-    assert(Live.empty() && "Not all defined registers are killed / dead?");
+  HandlePhysRegKill(Reg, MI);
+  // Only some of the sub-registers are used.
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    if (!Live.count(SubReg))
+      // Skip if this sub-register isn't defined.
+      continue;
+    HandlePhysRegKill(SubReg, MI);
   }
 
-  if (MI) {
-    // Does this extend the live range of a super-register?
-    SmallSet<unsigned, 8> Processed;
-    for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg);
-         unsigned SuperReg = *SuperRegs; ++SuperRegs) {
-      if (Processed.count(SuperReg))
-        continue;
-      MachineInstr *LastRef = PhysRegUse[SuperReg]
-        ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg];
-      if (LastRef && LastRef != MI) {
-        // The larger register is previously defined. Now a smaller part is
-        // being re-defined. Treat it as read/mod/write if there are uses
-        // below.
-        // EAX =
-        // AX  =        EAX<imp-use,kill>, EAX<imp-def>
-        // ...
-        ///    =  EAX
-        if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) {
-          MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/,
-                                                   true/*IsImp*/,true/*IsKill*/));
-          MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/,
-                                                   true/*IsImp*/));
-          PhysRegDef[SuperReg]  = MI;
-          PhysRegUse[SuperReg]  = NULL;
-          Processed.insert(SuperReg);
-          for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
-            PhysRegDef[*SS]  = MI;
-            PhysRegUse[*SS]  = NULL;
-            Processed.insert(*SS);
-          }
-        } else {
-          // Otherwise, the super register is killed.
-          if (HandlePhysRegKill(SuperReg, MI)) {
-            PhysRegDef[SuperReg]  = NULL;
-            PhysRegUse[SuperReg]  = NULL;
-            for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
-              PhysRegDef[*SS]  = NULL;
-              PhysRegUse[*SS]  = NULL;
-              Processed.insert(*SS);
-            }
-          }
-        }
-      }
-    }
+  if (MI)
+    Defs.push_back(Reg);  // Remember this def.
+}
 
-    // Remember this def.
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+                                      SmallVector<unsigned, 4> &Defs) {
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.back();
+    Defs.pop_back();
     PhysRegDef[Reg]  = MI;
     PhysRegUse[Reg]  = NULL;
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
@@ -480,6 +415,21 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
   }
 }
 
+namespace {
+  struct RegSorter {
+    const TargetRegisterInfo *TRI;
+
+    RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { }
+    bool operator()(unsigned A, unsigned B) {
+      if (TRI->isSubRegister(A, B))
+        return true;
+      else if (TRI->isSubRegister(B, A))
+        return false;
+      return A < B;
+    }
+  };
+}
+
 bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   MRI = &mf.getRegInfo();
@@ -512,11 +462,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     MachineBasicBlock *MBB = *DFI;
 
     // Mark live-in registers as live-in.
+    SmallVector<unsigned, 4> Defs;
     for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
            EE = MBB->livein_end(); II != EE; ++II) {
       assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
              "Cannot have a live-in virtual register!");
-      HandlePhysRegDef(*II, 0);
+      HandlePhysRegDef(*II, 0, Defs);
     }
 
     // Loop over all of the instructions, processing them.
@@ -563,8 +514,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegDef(MOReg, MI);
         else if (!ReservedRegisters[MOReg])
-          HandlePhysRegDef(MOReg, MI);
+          HandlePhysRegDef(MOReg, MI, Defs);
       }
+      UpdatePhysRegDefs(MI, Defs);
     }
 
     // Handle any virtual assignments from PHI nodes which might be at the
@@ -603,7 +555,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     // available at the end of the basic block.
     for (unsigned i = 0; i != NumRegs; ++i)
       if (PhysRegDef[i] || PhysRegUse[i])
-        HandlePhysRegDef(i, 0);
+        HandlePhysRegDef(i, 0, Defs);
 
     std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
     std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 14acb71..8486bb0 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -19,12 +19,14 @@
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -38,6 +40,7 @@ namespace {
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -53,7 +56,8 @@ namespace {
     void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
                           const TargetRegisterInfo &TRI);
     void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
-                          const TargetRegisterInfo &TRI);
+                          const TargetRegisterInfo &TRI,
+                          bool AddIfNotFound = false);
   };
 
   char LowerSubregsInstructionPass::ID = 0;
@@ -85,10 +89,11 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
 void
 LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
                                               unsigned SrcReg,
-                                              const TargetRegisterInfo &TRI) {
+                                              const TargetRegisterInfo &TRI,
+                                              bool AddIfNotFound) {
   for (MachineBasicBlock::iterator MII =
         prior(MachineBasicBlock::iterator(MI)); ; --MII) {
-    if (MII->addRegisterKilled(SrcReg, &TRI))
+    if (MII->addRegisterKilled(SrcReg, &TRI, AddIfNotFound))
       break;
     assert(MII != MI->getParent()->begin() &&
            "copyRegToReg output doesn't reference source register!");
@@ -100,7 +105,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
   MachineFunction &MF = *MBB->getParent();
   const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  
+
   assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
          MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
          MI->getOperand(2).isImm() && "Malformed extract_subreg");
@@ -114,41 +119,41 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
          "Extract supperg source must be a physical register");
   assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
          "Extract destination must be in a physical register");
-         
-  DOUT << "subreg: CONVERTING: " << *MI;
+  assert(SrcReg && "invalid subregister index for register");
+
+  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
 
   if (SrcReg == DstReg) {
-    // No need to insert an identify copy instruction.
-    DOUT << "subreg: eliminated!";
-    // Find the kill of the destination register's live range, and insert
-    // a kill of the source register at that point.
-    if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead())
-      for (MachineBasicBlock::iterator MII =
-             next(MachineBasicBlock::iterator(MI));
-           MII != MBB->end(); ++MII)
-        if (MII->killsRegister(DstReg, &TRI)) {
-          MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true);
-          break;
-        }
+    // No need to insert an identity copy instruction.
+    if (MI->getOperand(1).isKill()) {
+      // We must make sure the super-register gets killed. Replace the
+      // instruction with KILL.
+      MI->setDesc(TII.get(TargetInstrInfo::KILL));
+      MI->RemoveOperand(2);     // SubIdx
+      DEBUG(errs() << "subreg: replace by: " << *MI);
+      return true;
+    }
+
+    DEBUG(errs() << "subreg: eliminated!");
   } else {
     // Insert copy
-    const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg);
-    assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) &&
-            "Extract subreg and Dst must be of same register class");
-    TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC);
+    const TargetRegisterClass *TRCS = TRI.getPhysicalRegisterRegClass(DstReg);
+    const TargetRegisterClass *TRCD = TRI.getPhysicalRegisterRegClass(SrcReg);
+    bool Emitted = TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
+    (void)Emitted;
+    assert(Emitted && "Subreg and Dst must be of compatible register class");
     // Transfer the kill/dead flags, if needed.
     if (MI->getOperand(0).isDead())
       TransferDeadFlag(MI, DstReg, TRI);
     if (MI->getOperand(1).isKill())
-      TransferKillFlag(MI, SrcReg, TRI);
-
-#ifndef NDEBUG
-    MachineBasicBlock::iterator dMI = MI;
-    DOUT << "subreg: " << *(--dMI);
-#endif
+      TransferKillFlag(MI, SuperReg, TRI, true);
+    DEBUG({
+        MachineBasicBlock::iterator dMI = MI;
+        errs() << "subreg: " << *(--dMI);
+      });
   }
 
-  DOUT << "\n";
+  DEBUG(errs() << '\n');
   MBB->erase(MI);
   return true;
 }
@@ -176,7 +181,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DOUT << "subreg: CONVERTING: " << *MI;
+  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg && InsSIdx == 0) {
     // No need to insert an identify copy instruction.
@@ -185,7 +190,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
     // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
     // The first def is defining RAX, not EAX so the top bits were not
     // zero extended.
-    DOUT << "subreg: eliminated!";
+    DEBUG(errs() << "subreg: eliminated!");
   } else {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
@@ -196,14 +201,13 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
       TransferDeadFlag(MI, DstSubReg, TRI);
     if (MI->getOperand(2).isKill())
       TransferKillFlag(MI, InsReg, TRI);
-
-#ifndef NDEBUG
-    MachineBasicBlock::iterator dMI = MI;
-    DOUT << "subreg: " << *(--dMI);
-#endif
+    DEBUG({
+        MachineBasicBlock::iterator dMI = MI;
+        errs() << "subreg: " << *(--dMI);
+      });
   }
 
-  DOUT << "\n";
+  DEBUG(errs() << '\n');
   MBB->erase(MI);
   return true;                    
 }
@@ -228,49 +232,79 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
   assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
   assert(SubIdx != 0 && "Invalid index for insert_subreg");
   unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
-  
+  assert(DstSubReg && "invalid subregister index for register");
   assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
          "Insert superreg source must be in a physical register");
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DOUT << "subreg: CONVERTING: " << *MI;
+  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg) {
-    // No need to insert an identify copy instruction.
-    DOUT << "subreg: eliminated!";
+    // No need to insert an identity copy instruction. If the SrcReg was
+    // <undef>, we need to make sure it is alive by inserting a KILL
+    if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
+      MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+                                TII.get(TargetInstrInfo::KILL), DstReg);
+      if (MI->getOperand(2).isUndef())
+        MIB.addReg(InsReg, RegState::Undef);
+      else
+        MIB.addReg(InsReg, RegState::Kill);
+    } else {
+      DEBUG(errs() << "subreg: eliminated!\n");
+      MBB->erase(MI);
+      return true;
+    }
   } else {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
     const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
-    TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    if (MI->getOperand(2).isUndef())
+      // If the source register being inserted is undef, then this becomes a
+      // KILL.
+      BuildMI(*MBB, MI, MI->getDebugLoc(),
+              TII.get(TargetInstrInfo::KILL), DstSubReg);
+    else
+      TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    MachineBasicBlock::iterator CopyMI = MI;
+    --CopyMI;
+
+    // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg.
+    if (!MI->getOperand(1).isUndef())
+      CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true));
+
     // Transfer the kill/dead flags, if needed.
-    if (MI->getOperand(0).isDead())
+    if (MI->getOperand(0).isDead()) {
       TransferDeadFlag(MI, DstSubReg, TRI);
-    if (MI->getOperand(1).isKill())
-      TransferKillFlag(MI, InsReg, TRI);
+    } else {
+      // Make sure the full DstReg is live after this replacement.
+      CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true));
+    }
 
-#ifndef NDEBUG
-    MachineBasicBlock::iterator dMI = MI;
-    DOUT << "subreg: " << *(--dMI);
-#endif
+    // Make sure the inserted register gets killed
+    if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef())
+      TransferKillFlag(MI, InsReg, TRI);
   }
 
-  DOUT << "\n";
+  DEBUG({
+      MachineBasicBlock::iterator dMI = MI;
+      errs() << "subreg: " << *(--dMI) << "\n";
+    });
+
   MBB->erase(MI);
-  return true;                    
+  return true;
 }
 
 /// runOnMachineFunction - Reduce subregister inserts and extracts to register
 /// copies.
 ///
 bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "Machine Function\n";
-  
-  bool MadeChange = false;
+  DEBUG(errs() << "Machine Function\n"  
+               << "********** LOWERING SUBREG INSTRS **********\n"
+               << "********** Function: " 
+               << MF.getFunction()->getName() << '\n');
 
-  DOUT << "********** LOWERING SUBREG INSTRS **********\n";
-  DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+  bool MadeChange = false;
 
   for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
        mbbi != mbbe; ++mbbi) {
diff --git a/lib/CodeGen/MachO.h b/lib/CodeGen/MachO.h
index bd9bd61..f2b40fe 100644
--- a/lib/CodeGen/MachO.h
+++ b/lib/CodeGen/MachO.h
@@ -14,17 +14,15 @@
 #ifndef MACHO_H
 #define MACHO_H
 
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include <string>
 #include <vector>
 
 namespace llvm {
 
-typedef std::vector<unsigned char> DataBuffer;
-  
+class GlobalValue;
+class MCAsmInfo;
+
 /// MachOSym - This struct contains information about each symbol that is
 /// added to logical symbol table for the module.  This is eventually
 /// turned into a real symbol table in the file.
@@ -70,7 +68,7 @@ struct MachOSym {
   };
   
   MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
-           const TargetAsmInfo *TAI);
+           const MCAsmInfo *MAI);
 
   struct SymCmp {
     // FIXME: this does not appear to be sorting 'f' after 'F'
@@ -110,7 +108,7 @@ struct MachOHeader {
   
   /// HeaderData - The actual data for the header which we are building
   /// up for emission to the file.
-  DataBuffer HeaderData;
+  std::vector<unsigned char> HeaderData;
 
   // Constants for the filetype field
   // see <mach-o/loader.h> for additional info on the various types
@@ -180,8 +178,8 @@ struct MachOHeader {
   };
 
   MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
-                  reserved(0) { }
-  
+                  reserved(0) {}
+
   /// cmdSize - This routine returns the size of the MachOSection as written
   /// to disk, depending on whether the destination is a 64 bit Mach-O file.
   unsigned cmdSize(bool is64Bit) const {
@@ -203,7 +201,7 @@ struct MachOHeader {
   }
 
 }; // end struct MachOHeader
-    
+
 /// MachOSegment - This struct contains the necessary information to
 /// emit the load commands for each section in the file.
 struct MachOSegment {
@@ -245,13 +243,13 @@ struct MachOSegment {
          SEG_VM_PROT_EXECUTE  = VM_PROT_EXECUTE,
          SEG_VM_PROT_ALL      = VM_PROT_ALL
   };
-  
+
   // Constants for the cmd field
   // see <mach-o/loader.h>
   enum { LC_SEGMENT    = 0x01,  // segment of this file to be mapped
          LC_SEGMENT_64 = 0x19   // 64-bit segment of this file to be mapped
   };
-  
+
   /// cmdSize - This routine returns the size of the MachOSection as written
   /// to disk, depending on whether the destination is a 64 bit Mach-O file.
   unsigned cmdSize(bool is64Bit) const {
@@ -272,11 +270,10 @@ struct MachOSegment {
 /// turned into the SectionCommand in the load command for a particlar
 /// segment.
 
-struct MachOSection { 
+struct MachOSection : public BinaryObject { 
   std::string  sectname; // name of this section, 
   std::string  segname;  // segment this section goes in
   uint64_t  addr;        // memory address of this section
-  uint64_t  size;        // size in bytes of this section
   uint32_t  offset;      // file offset of this section
   uint32_t  align;       // section alignment (power of 2)
   uint32_t  reloff;      // file offset of relocation entries
@@ -285,24 +282,15 @@ struct MachOSection {
   uint32_t  reserved1;   // reserved (for offset or index)
   uint32_t  reserved2;   // reserved (for count or sizeof)
   uint32_t  reserved3;   // reserved (64 bit only)
-  
+
   /// A unique number for this section, which will be used to match symbols
   /// to the correct section.
   uint32_t Index;
-  
-  /// SectionData - The actual data for this section which we are building
-  /// up for emission to the file.
-  DataBuffer SectionData;
 
   /// RelocBuffer - A buffer to hold the mach-o relocations before we write
   /// them out at the appropriate location in the file.
-  DataBuffer RelocBuffer;
-  
-  /// Relocations - The relocations that we have encountered so far in this 
-  /// section that we will need to convert to MachORelocation entries when
-  /// the file is written.
-  std::vector<MachineRelocation> Relocations;
-  
+  std::vector<unsigned char> RelocBuffer;
+
   // Constants for the section types (low 8 bits of flags field)
   // see <mach-o/loader.h>
   enum { S_REGULAR = 0,
@@ -374,48 +362,49 @@ struct MachOSection {
   }
 
   MachOSection(const std::string &seg, const std::string &sect)
-    : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
-      reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
+    : BinaryObject(), sectname(sect), segname(seg), addr(0), offset(0),
+      align(2), reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
       reserved3(0) { }
 
 }; // end struct MachOSection
 
-    /// MachOSymTab - This struct contains information about the offsets and 
-    /// size of symbol table information.
-    /// segment.
-    struct MachODySymTab {
-      uint32_t cmd;             // LC_DYSYMTAB
-      uint32_t cmdsize;         // sizeof( MachODySymTab )
-      uint32_t ilocalsym;       // index to local symbols
-      uint32_t nlocalsym;       // number of local symbols
-      uint32_t iextdefsym;      // index to externally defined symbols
-      uint32_t nextdefsym;      // number of externally defined symbols
-      uint32_t iundefsym;       // index to undefined symbols
-      uint32_t nundefsym;       // number of undefined symbols
-      uint32_t tocoff;          // file offset to table of contents
-      uint32_t ntoc;            // number of entries in table of contents
-      uint32_t modtaboff;       // file offset to module table
-      uint32_t nmodtab;         // number of module table entries
-      uint32_t extrefsymoff;    // offset to referenced symbol table
-      uint32_t nextrefsyms;     // number of referenced symbol table entries
-      uint32_t indirectsymoff;  // file offset to the indirect symbol table
-      uint32_t nindirectsyms;   // number of indirect symbol table entries
-      uint32_t extreloff;       // offset to external relocation entries
-      uint32_t nextrel;         // number of external relocation entries
-      uint32_t locreloff;       // offset to local relocation entries
-      uint32_t nlocrel;         // number of local relocation entries
-
-      // Constants for the cmd field
-      // see <mach-o/loader.h>
-      enum { LC_DYSYMTAB = 0x0B  // dynamic link-edit symbol table info
-      };
-      
-      MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
-        ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
-        iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
-        nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
-        nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
-    };
+/// MachOSymTab - This struct contains information about the offsets and 
+/// size of symbol table information.
+/// segment.
+struct MachODySymTab {
+  uint32_t cmd;             // LC_DYSYMTAB
+  uint32_t cmdsize;         // sizeof(MachODySymTab)
+  uint32_t ilocalsym;       // index to local symbols
+  uint32_t nlocalsym;       // number of local symbols
+  uint32_t iextdefsym;      // index to externally defined symbols
+  uint32_t nextdefsym;      // number of externally defined symbols
+  uint32_t iundefsym;       // index to undefined symbols
+  uint32_t nundefsym;       // number of undefined symbols
+  uint32_t tocoff;          // file offset to table of contents
+  uint32_t ntoc;            // number of entries in table of contents
+  uint32_t modtaboff;       // file offset to module table
+  uint32_t nmodtab;         // number of module table entries
+  uint32_t extrefsymoff;    // offset to referenced symbol table
+  uint32_t nextrefsyms;     // number of referenced symbol table entries
+  uint32_t indirectsymoff;  // file offset to the indirect symbol table
+  uint32_t nindirectsyms;   // number of indirect symbol table entries
+  uint32_t extreloff;       // offset to external relocation entries
+  uint32_t nextrel;         // number of external relocation entries
+  uint32_t locreloff;       // offset to local relocation entries
+  uint32_t nlocrel;         // number of local relocation entries
+
+  // Constants for the cmd field
+  // see <mach-o/loader.h>
+  enum { LC_DYSYMTAB = 0x0B  // dynamic link-edit symbol table info
+  };
+  
+  MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
+    ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
+    iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
+    nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
+    nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) {}
+
+}; // end struct MachODySymTab
 
 } // end namespace llvm
 
diff --git a/lib/CodeGen/MachOCodeEmitter.cpp b/lib/CodeGen/MachOCodeEmitter.cpp
index 02b02de..1318477 100644
--- a/lib/CodeGen/MachOCodeEmitter.cpp
+++ b/lib/CodeGen/MachOCodeEmitter.cpp
@@ -7,22 +7,37 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MachO.h"
+#include "MachOWriter.h"
 #include "MachOCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/OutputBuffer.h"
+#include <vector>
 
 //===----------------------------------------------------------------------===//
 //                       MachOCodeEmitter Implementation
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
-    
+
+MachOCodeEmitter::MachOCodeEmitter(MachOWriter &mow, MachOSection &mos) :
+      ObjectCodeEmitter(&mos), MOW(mow), TM(MOW.TM) {
+  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+  isLittleEndian = TM.getTargetData()->isLittleEndian();
+  MAI = TM.getMCAsmInfo();
+}
+
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 
@@ -39,28 +54,18 @@ void MachOCodeEmitter::startFunction(MachineFunction &MF) {
   // Get the Mach-O Section that this function belongs in.
   MachOSection *MOS = MOW.getTextSection();
   
-  // FIXME: better memory management
-  MOS->SectionData.reserve(4096);
-  BufferBegin = &MOS->SectionData[0];
-  BufferEnd = BufferBegin + MOS->SectionData.capacity();
-
   // Upgrade the section alignment if required.
   if (MOS->align < Align) MOS->align = Align;
 
-  // Round the size up to the correct alignment for starting the new function.
-  if ((MOS->size & ((1 << Align) - 1)) != 0) {
-    MOS->size += (1 << Align);
-    MOS->size &= ~((1 << Align) - 1);
-  }
+  MOS->emitAlignment(Align);
+
+  // Create symbol for function entry
+  const GlobalValue *FuncV = MF.getFunction();
+  MachOSym FnSym(FuncV, MOW.Mang->getMangledName(FuncV), MOS->Index, MAI);
+  FnSym.n_value = getCurrentPCOffset();
 
-  // FIXME: Using MOS->size directly here instead of calculating it from the
-  // output buffer size (impossible because the code emitter deals only in raw
-  // bytes) forces us to manually synchronize size and write padding zero bytes
-  // to the output buffer for all non-text sections.  For text sections, we do
-  // not synchonize the output buffer, and we just blow up if anyone tries to
-  // write non-code to it.  An assert should probably be added to
-  // AddSymbolToSection to prevent calling it on the text section.
-  CurBufferPtr = BufferBegin + MOS->size;
+  // add it to the symtab.
+  MOW.SymbolTable.push_back(FnSym);
 }
 
 /// finishFunction - This callback is invoked after the function is completely
@@ -71,15 +76,6 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
   // Get the Mach-O Section that this function belongs in.
   MachOSection *MOS = MOW.getTextSection();
 
-  // Get a symbol for the function to add to the symbol table
-  // FIXME: it seems like we should call something like AddSymbolToSection
-  // in startFunction rather than changing the section size and symbol n_value
-  // here.
-  const GlobalValue *FuncV = MF.getFunction();
-  MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TAI);
-  FnSym.n_value = MOS->size;
-  MOS->size = CurBufferPtr - BufferBegin;
-  
   // Emit constant pool to appropriate section(s)
   emitConstantPool(MF.getConstantPool());
 
@@ -110,14 +106,11 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
       // FIXME: This should be a set or something that uniques
       MOW.PendingGlobals.push_back(MR.getGlobalValue());
     } else {
-      assert(0 && "Unhandled relocation type");
+      llvm_unreachable("Unhandled relocation type");
     }
-    MOS->Relocations.push_back(MR);
+    MOS->addRelocation(MR);
   }
   Relocations.clear();
-  
-  // Finally, add it to the symtab.
-  MOW.SymbolTable.push_back(FnSym);
 
   // Clear per-function data structures.
   CPLocations.clear();
@@ -151,13 +144,10 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
     unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
 
     MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
-    OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+    OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian);
 
-    CPLocations.push_back(Sec->SectionData.size());
+    CPLocations.push_back(Sec->size());
     CPSections.push_back(Sec->Index);
-    
-    // FIXME: remove when we have unified size + output buffer
-    Sec->size += Size;
 
     // Allocate space in the section for the global.
     // FIXME: need alignment?
@@ -165,14 +155,13 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
     for (unsigned j = 0; j < Size; ++j)
       SecDataOut.outbyte(0);
 
-    MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
-                TM.getTargetData(), Sec->Relocations);
+    MachOWriter::InitMem(CP[i].Val.ConstVal, CPLocations[i],
+                         TM.getTargetData(), Sec);
   }
 }
 
 /// emitJumpTables - Emit all the jump tables for a given jump table info
 /// record to the appropriate section.
-
 void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
@@ -183,24 +172,21 @@ void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
 
   MachOSection *Sec = MOW.getJumpTableSection();
   unsigned TextSecIndex = MOW.getTextSection()->Index;
-  OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+  OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian);
 
   for (unsigned i = 0, e = JT.size(); i != e; ++i) {
     // For each jump table, record its offset from the start of the section,
     // reserve space for the relocations to the MBBs, and add the relocations.
     const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-    JTLocations.push_back(Sec->SectionData.size());
+    JTLocations.push_back(Sec->size());
     for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
-      MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
-                                               MBBs[mi]));
+      MachineRelocation MR(MOW.GetJTRelocation(Sec->size(), MBBs[mi]));
       MR.setResultPointer((void *)JTLocations[i]);
       MR.setConstantVal(TextSecIndex);
-      Sec->Relocations.push_back(MR);
+      Sec->addRelocation(MR);
       SecDataOut.outaddr(0);
     }
   }
-  // FIXME: remove when we have unified size + output buffer
-  Sec->size = Sec->SectionData.size();
 }
 
 } // end namespace llvm
diff --git a/lib/CodeGen/MachOCodeEmitter.h b/lib/CodeGen/MachOCodeEmitter.h
index 0a6e4e4..4752446 100644
--- a/lib/CodeGen/MachOCodeEmitter.h
+++ b/lib/CodeGen/MachOCodeEmitter.h
@@ -10,16 +10,17 @@
 #ifndef MACHOCODEEMITTER_H
 #define MACHOCODEEMITTER_H
 
-#include "MachOWriter.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include <vector>
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <map>
 
 namespace llvm {
 
+class MachOWriter;
+
 /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code 
 /// for functions to the Mach-O file.
 
-class MachOCodeEmitter : public MachineCodeEmitter {
+class MachOCodeEmitter : public ObjectCodeEmitter {
   MachOWriter &MOW;
 
   /// Target machine description.
@@ -29,36 +30,16 @@ class MachOCodeEmitter : public MachineCodeEmitter {
   /// machine directly, indicating what header values and flags to set.
   bool is64Bit, isLittleEndian;
 
-  const TargetAsmInfo *TAI;
+  const MCAsmInfo *MAI;
 
   /// Relocations - These are the relocations that the function needs, as
   /// emitted.
   std::vector<MachineRelocation> Relocations;
-  
-  /// CPLocations - This is a map of constant pool indices to offsets from the
-  /// start of the section for that constant pool index.
-  std::vector<uintptr_t> CPLocations;
-
-  /// CPSections - This is a map of constant pool indices to the MachOSection
-  /// containing the constant pool entry for that index.
-  std::vector<unsigned> CPSections;
-
-  /// JTLocations - This is a map of jump table indices to offsets from the
-  /// start of the section for that jump table index.
-  std::vector<uintptr_t> JTLocations;
-
-  /// MBBLocations - This vector is a mapping from MBB ID's to their address.
-  /// It is filled in by the StartMachineBasicBlock callback and queried by
-  /// the getMachineBasicBlockAddress callback.
-  std::vector<uintptr_t> MBBLocations;
-  
+
+  std::map<uint64_t, uintptr_t> Labels;
+
 public:
-  MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM)
-  {
-    is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
-    isLittleEndian = TM.getTargetData()->isLittleEndian();
-    TAI = TM.getTargetAsmInfo();  
-  }
+  MachOCodeEmitter(MachOWriter &mow, MachOSection &mos);
 
   virtual void startFunction(MachineFunction &MF);
   virtual bool finishFunction(MachineFunction &MF);
@@ -66,61 +47,20 @@ public:
   virtual void addRelocation(const MachineRelocation &MR) {
     Relocations.push_back(MR);
   }
-  
+
   void emitConstantPool(MachineConstantPool *MCP);
   void emitJumpTables(MachineJumpTableInfo *MJTI);
-  
-  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-    assert(CPLocations.size() > Index && "CP not emitted!");
-    return CPLocations[Index];
-  }
-  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-    assert(JTLocations.size() > Index && "JT not emitted!");
-    return JTLocations[Index];
-  }
-
-  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
-    if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-      MBBLocations.resize((MBB->getNumber()+1)*2);
-    MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-  }
 
-  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-    assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
-           MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-    return MBBLocations[MBB->getNumber()];
+  virtual void emitLabel(uint64_t LabelID) {
+    Labels[LabelID] = getCurrentPCOffset();
   }
 
   virtual uintptr_t getLabelAddress(uint64_t Label) const {
-    assert(0 && "get Label not implemented");
-    abort();
-    return 0;
-  }
-
-  virtual void emitLabel(uint64_t LabelID) {
-    assert(0 && "emit Label not implemented");
-    abort();
+    return Labels.find(Label)->second;
   }
 
   virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
 
-  /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
-  virtual void startGVStub(const GlobalValue* F, unsigned StubSize,
-                           unsigned Alignment = 1) {
-    assert(0 && "JIT specific function called!");
-    abort();
-  }
-  virtual void startGVStub(const GlobalValue* F, void *Buffer, 
-                           unsigned StubSize) {
-    assert(0 && "JIT specific function called!");
-    abort();
-  }
-  virtual void *finishGVStub(const GlobalValue* F) {
-    assert(0 && "JIT specific function called!");
-    abort();
-    return 0;
-  }
-
 }; // end class MachOCodeEmitter
 
 } // end namespace llvm
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
index 163df69..73b15ed 100644
--- a/lib/CodeGen/MachOWriter.cpp
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -22,36 +22,32 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MachO.h"
 #include "MachOWriter.h"
 #include "MachOCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/FileWriters.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/OutputBuffer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cstring>
 
 namespace llvm {
 
 /// AddMachOWriter - Concrete function to add the Mach-O writer to the function
 /// pass manager.
-MachineCodeEmitter *AddMachOWriter(PassManagerBase &PM,
+ObjectCodeEmitter *AddMachOWriter(PassManagerBase &PM,
                                          raw_ostream &O,
                                          TargetMachine &TM) {
   MachOWriter *MOW = new MachOWriter(O, TM);
   PM.add(MOW);
-  return &MOW->getMachineCodeEmitter();
+  return MOW->getObjectCodeEmitter();
 }
 
 //===----------------------------------------------------------------------===//
@@ -65,15 +61,14 @@ MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm)
   is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
   isLittleEndian = TM.getTargetData()->isLittleEndian();
 
-  TAI = TM.getTargetAsmInfo();
+  MAI = TM.getMCAsmInfo();
 
   // Create the machine code emitter object for this target.
-
-  MCE = new MachOCodeEmitter(*this);
+  MachOCE = new MachOCodeEmitter(*this, *getTextSection(true));
 }
 
 MachOWriter::~MachOWriter() {
-  delete MCE;
+  delete MachOCE;
 }
 
 bool MachOWriter::doInitialization(Module &M) {
@@ -97,9 +92,9 @@ bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
 /// the Mach-O file to 'O'.
 bool MachOWriter::doFinalization(Module &M) {
   // FIXME: we don't handle debug info yet, we should probably do that.
-
   // Okay, the.text section has been completed, build the .data, .bss, and
   // "common" sections next.
+
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I)
     EmitGlobal(I);
@@ -125,6 +120,89 @@ bool MachOWriter::doFinalization(Module &M) {
   return false;
 }
 
+// getConstSection - Get constant section for Constant 'C'
+MachOSection *MachOWriter::getConstSection(Constant *C) {
+  const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+  if (CVA && CVA->isCString())
+    return getSection("__TEXT", "__cstring", 
+                      MachOSection::S_CSTRING_LITERALS);
+
+  const Type *Ty = C->getType();
+  if (Ty->isPrimitiveType() || Ty->isInteger()) {
+    unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+    switch(Size) {
+    default: break; // Fall through to __TEXT,__const
+    case 4:
+      return getSection("__TEXT", "__literal4",
+                        MachOSection::S_4BYTE_LITERALS);
+    case 8:
+      return getSection("__TEXT", "__literal8",
+                        MachOSection::S_8BYTE_LITERALS);
+    case 16:
+      return getSection("__TEXT", "__literal16",
+                        MachOSection::S_16BYTE_LITERALS);
+    }
+  }
+  return getSection("__TEXT", "__const");
+}
+
+// getJumpTableSection - Select the Jump Table section
+MachOSection *MachOWriter::getJumpTableSection() {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return getTextSection(false);
+  else
+    return getSection("__TEXT", "__const");
+}
+
+// getSection - Return the section with the specified name, creating a new
+// section if one does not already exist.
+MachOSection *MachOWriter::getSection(const std::string &seg,
+                                      const std::string &sect,
+                                      unsigned Flags /* = 0 */ ) {
+  MachOSection *MOS = SectionLookup[seg+sect];
+  if (MOS) return MOS;
+
+  MOS = new MachOSection(seg, sect);
+  SectionList.push_back(MOS);
+  MOS->Index = SectionList.size();
+  MOS->flags = MachOSection::S_REGULAR | Flags;
+  SectionLookup[seg+sect] = MOS;
+  return MOS;
+}
+
+// getTextSection - Return text section with different flags for code/data
+MachOSection *MachOWriter::getTextSection(bool isCode /* = true */ ) {
+  if (isCode)
+    return getSection("__TEXT", "__text",
+                      MachOSection::S_ATTR_PURE_INSTRUCTIONS |
+                      MachOSection::S_ATTR_SOME_INSTRUCTIONS);
+  else
+    return getSection("__TEXT", "__text");
+}
+
+MachOSection *MachOWriter::getBSSSection() {
+  return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
+}
+
+// GetJTRelocation - Get a relocation a new BB relocation based
+// on target information.
+MachineRelocation MachOWriter::GetJTRelocation(unsigned Offset,
+                                               MachineBasicBlock *MBB) const {
+  return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
+}
+
+// GetTargetRelocation - Returns the number of relocations.
+unsigned MachOWriter::GetTargetRelocation(MachineRelocation &MR,
+                             unsigned FromIdx, unsigned ToAddr,
+                             unsigned ToIndex, OutputBuffer &RelocOut,
+                             OutputBuffer &SecOut, bool Scattered,
+                             bool Extern) {
+  return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
+                                                      ToIndex, RelocOut,
+                                                      SecOut, Scattered,
+                                                      Extern);
+}
+
 void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
   const Type *Ty = GV->getType()->getElementType();
   unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
@@ -133,37 +211,31 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
   // Reserve space in the .bss section for this symbol while maintaining the
   // desired section alignment, which must be at least as much as required by
   // this symbol.
-  OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+  OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian);
 
   if (Align) {
-    uint64_t OrigSize = Sec->size;
     Align = Log2_32(Align);
     Sec->align = std::max(unsigned(Sec->align), Align);
-    Sec->size = (Sec->size + Align - 1) & ~(Align-1);
 
-    // Add alignment padding to buffer as well.
-    // FIXME: remove when we have unified size + output buffer
-    unsigned AlignedSize = Sec->size - OrigSize;
-    for (unsigned i = 0; i < AlignedSize; ++i)
-      SecDataOut.outbyte(0);
+    Sec->emitAlignment(Sec->align);
   }
   // Globals without external linkage apparently do not go in the symbol table.
   if (!GV->hasLocalLinkage()) {
-    MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TAI);
-    Sym.n_value = Sec->size;
+    MachOSym Sym(GV, Mang->getMangledName(GV), Sec->Index, MAI);
+    Sym.n_value = Sec->size();
     SymbolTable.push_back(Sym);
   }
 
   // Record the offset of the symbol, and then allocate space for it.
   // FIXME: remove when we have unified size + output buffer
-  Sec->size += Size;
 
   // Now that we know what section the GlovalVariable is going to be emitted
   // into, update our mappings.
   // FIXME: We may also need to update this when outputting non-GlobalVariable
   // GlobalValues such as functions.
+
   GVSection[GV] = Sec;
-  GVOffset[GV] = Sec->SectionData.size();
+  GVOffset[GV] = Sec->size();
 
   // Allocate space in the section for the global.
   for (unsigned i = 0; i < Size; ++i)
@@ -183,8 +255,8 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
     // merged with other symbols.
     if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
         GV->hasCommonLinkage()) {
-      MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), 
-                              MachOSym::NO_SECT, TAI);
+      MachOSym ExtOrCommonSym(GV, Mang->getMangledName(GV),
+                              MachOSym::NO_SECT, MAI);
       // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
       // bytes of the symbol.
       ExtOrCommonSym.n_value = Size;
@@ -205,8 +277,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
   MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) :
                                          getDataSection();
   AddSymbolToSection(Sec, GV);
-  InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
-          TM.getTargetData(), Sec->Relocations);
+  InitMem(GV->getInitializer(), GVOffset[GV], TM.getTargetData(), Sec);
 }
 
 
@@ -214,6 +285,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
 void MachOWriter::EmitHeaderAndLoadCommands() {
   // Step #0: Fill in the segment load command size, since we need it to figure
   //          out the rest of the header fields
+
   MachOSegment SEG("", is64Bit);
   SEG.nsects  = SectionList.size();
   SEG.cmdsize = SEG.cmdSize(is64Bit) +
@@ -231,7 +303,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
 
   // Step #3: write the header to the file
   // Local alias to shortenify coming code.
-  DataBuffer &FH = Header.HeaderData;
+  std::vector<unsigned char> &FH = Header.HeaderData;
   OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
 
   FHOut.outword(Header.magic);
@@ -247,7 +319,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
   // Step #4: Finish filling in the segment load command and write it out
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I)
-    SEG.filesize += (*I)->size;
+    SEG.filesize += (*I)->size();
 
   SEG.vmsize = SEG.filesize;
   SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
@@ -271,9 +343,8 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
     MachOSection *MOS = *I;
     MOS->addr = currentAddr;
     MOS->offset = currentAddr + SEG.fileoff;
-
     // FIXME: do we need to do something with alignment here?
-    currentAddr += MOS->size;
+    currentAddr += MOS->size();
   }
 
   // Step #6: Emit the symbol table to temporary buffers, so that we know the
@@ -288,6 +359,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I) {
     MachOSection *MOS = *I;
+
     // Convert the relocations to target-specific relocations, and fill in the
     // relocation offset for this section.
     CalculateRelocations(*MOS);
@@ -298,7 +370,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
     FHOut.outstring(MOS->sectname, 16);
     FHOut.outstring(MOS->segname, 16);
     FHOut.outaddr(MOS->addr);
-    FHOut.outaddr(MOS->size);
+    FHOut.outaddr(MOS->size());
     FHOut.outword(MOS->offset);
     FHOut.outword(MOS->align);
     FHOut.outword(MOS->reloff);
@@ -351,24 +423,26 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
 
 /// EmitSections - Now that we have constructed the file header and load
 /// commands, emit the data for each section to the file.
-
 void MachOWriter::EmitSections() {
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I)
     // Emit the contents of each section
-    O.write((char*)&(*I)->SectionData[0], (*I)->size);
+    if ((*I)->size())
+      O.write((char*)&(*I)->getData()[0], (*I)->size());
 }
+
+/// EmitRelocations - emit relocation data from buffer.
 void MachOWriter::EmitRelocations() {
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I)
     // Emit the relocation entry data for each section.
-    O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
+    if ((*I)->RelocBuffer.size())
+      O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
 }
 
 /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
 /// each a string table index so that they appear in the correct order in the
 /// output file.
-
 void MachOWriter::BufferSymbolAndStringTable() {
   // The order of the symbol table is:
   // 1. local symbols
@@ -377,11 +451,10 @@ void MachOWriter::BufferSymbolAndStringTable() {
 
   // Before sorting the symbols, check the PendingGlobals for any undefined
   // globals that need to be put in the symbol table.
-
   for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
          E = PendingGlobals.end(); I != E; ++I) {
     if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
-      MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TAI);
+      MachOSym UndfSym(*I, Mang->getMangledName(*I), MachOSym::NO_SECT, MAI);
       SymbolTable.push_back(UndfSym);
       GVOffset[*I] = -1;
     }
@@ -389,19 +462,16 @@ void MachOWriter::BufferSymbolAndStringTable() {
 
   // Sort the symbols by name, so that when we partition the symbols by scope
   // of definition, we won't have to sort by name within each partition.
-
   std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSym::SymCmp());
 
   // Parition the symbol table entries so that all local symbols come before
   // all symbols with external linkage. { 1 | 2 3 }
-
   std::partition(SymbolTable.begin(), SymbolTable.end(),
                  MachOSym::PartitionByLocal);
 
   // Advance iterator to beginning of external symbols and partition so that
   // all external symbols defined in this module come before all external
   // symbols defined elsewhere. { 1 | 2 | 3 }
-
   for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
          E = SymbolTable.end(); I != E; ++I) {
     if (!MachOSym::PartitionByLocal(*I)) {
@@ -413,7 +483,6 @@ void MachOWriter::BufferSymbolAndStringTable() {
   // Calculate the starting index for each of the local, extern defined, and
   // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
   // load command.
-
   for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
          E = SymbolTable.end(); I != E; ++I) {
     if (MachOSym::PartitionByLocal(*I)) {
@@ -430,7 +499,6 @@ void MachOWriter::BufferSymbolAndStringTable() {
 
   // Write out a leading zero byte when emitting string table, for n_strx == 0
   // which means an empty string.
-
   OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
   StrTOut.outbyte(0);
 
@@ -439,7 +507,6 @@ void MachOWriter::BufferSymbolAndStringTable() {
   // 2. strings for local symbols
   // Since this is the opposite order from the symbol table, which we have just
   // sorted, we can walk the symbol table backwards to output the string table.
-
   for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
         E = SymbolTable.rend(); I != E; ++I) {
     if (I->GVName == "") {
@@ -478,24 +545,22 @@ void MachOWriter::BufferSymbolAndStringTable() {
 /// and the offset into that section.  From this information, create the
 /// appropriate target-specific MachORelocation type and add buffer it to be
 /// written out after we are finished writing out sections.
-
 void MachOWriter::CalculateRelocations(MachOSection &MOS) {
-  for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
-    MachineRelocation &MR = MOS.Relocations[i];
+  std::vector<MachineRelocation> Relocations =  MOS.getRelocations();
+  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+    MachineRelocation &MR = Relocations[i];
     unsigned TargetSection = MR.getConstantVal();
     unsigned TargetAddr = 0;
     unsigned TargetIndex = 0;
 
     // This is a scattered relocation entry if it points to a global value with
     // a non-zero offset.
-
     bool Scattered = false;
     bool Extern = false;
 
     // Since we may not have seen the GlobalValue we were interested in yet at
     // the time we emitted the relocation for it, fix it up now so that it
     // points to the offset into the correct section.
-
     if (MR.isGlobalValue()) {
       GlobalValue *GV = MR.getGlobalValue();
       MachOSection *MOSPtr = GVSection[GV];
@@ -503,7 +568,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
 
       // If we have never seen the global before, it must be to a symbol
       // defined in another module (N_UNDF).
-
       if (!MOSPtr) {
         // FIXME: need to append stub suffix
         Extern = true;
@@ -518,7 +582,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
 
     // If the symbol is locally defined, pass in the address of the section and
     // the section index to the code which will generate the target relocation.
-
     if (!Extern) {
         MachOSection &To = *SectionList[TargetSection - 1];
         TargetAddr = To.addr;
@@ -526,7 +589,7 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
     }
 
     OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
-    OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
+    OutputBuffer SecOut(MOS.getData(), is64Bit, isLittleEndian);
 
     MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
                                       RelocOut, SecOut, Scattered, Extern);
@@ -535,12 +598,11 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
 
 // InitMem - Write the value of a Constant to the specified memory location,
 // converting it into bytes and relocations.
-
-void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
-                          const TargetData *TD,
-                          std::vector<MachineRelocation> &MRs) {
+void MachOWriter::InitMem(const Constant *C, uintptr_t Offset,
+                          const TargetData *TD, MachOSection* mos) {
   typedef std::pair<const Constant*, intptr_t> CPair;
   std::vector<CPair> WorkList;
+  uint8_t *Addr = &mos->getData()[0];
 
   WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
 
@@ -572,9 +634,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
       }
       case Instruction::Add:
       default:
-        cerr << "ConstantExpr not handled as global var init: " << *CE << "\n";
-        abort();
-        break;
+        errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n";
+        llvm_unreachable(0);
       }
     } else if (PC->getType()->isSingleValueType()) {
       unsigned char *ptr = (unsigned char *)PA;
@@ -608,7 +669,7 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
           ptr[6] = val >> 48;
           ptr[7] = val >> 56;
         } else {
-          assert(0 && "Not implemented: bit widths > 64");
+          llvm_unreachable("Not implemented: bit widths > 64");
         }
         break;
       }
@@ -643,17 +704,19 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
           memset(ptr, 0, TD->getPointerSize());
         else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) {
           // FIXME: what about function stubs?
-          MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, 
+          mos->addRelocation(MachineRelocation::getGV(PA-(intptr_t)Addr,
                                                  MachineRelocation::VANILLA,
                                                  const_cast<GlobalValue*>(GV),
                                                  ScatteredOffset));
           ScatteredOffset = 0;
         } else
-          assert(0 && "Unknown constant pointer type!");
+          llvm_unreachable("Unknown constant pointer type!");
         break;
       default:
-        cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n";
-        abort();
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "ERROR: Constant unimp for type: " << *PC->getType();
+        llvm_report_error(Msg.str());
       }
     } else if (isa<ConstantAggregateZero>(PC)) {
       memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType()));
@@ -669,8 +732,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
         WorkList.push_back(CPair(CPS->getOperand(i),
                                  PA+SL->getElementOffset(i)));
     } else {
-      cerr << "Bad Type: " << *PC->getType() << "\n";
-      assert(0 && "Unknown constant type to initialize memory with!");
+      errs() << "Bad Type: " << *PC->getType() << "\n";
+      llvm_unreachable("Unknown constant type to initialize memory with!");
     }
   }
 }
@@ -680,13 +743,14 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
 //===----------------------------------------------------------------------===//
 
 MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
-                   const TargetAsmInfo *TAI) :
+                   const MCAsmInfo *MAI) :
   GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
   n_desc(0), n_value(0) {
 
+  // FIXME: This is completely broken, it should use the mangler interface.
   switch (GV->getLinkage()) {
   default:
-    assert(0 && "Unexpected linkage type!");
+    llvm_unreachable("Unexpected linkage type!");
     break;
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
@@ -695,17 +759,19 @@ MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
   case GlobalValue::CommonLinkage:
     assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
   case GlobalValue::ExternalLinkage:
-    GVName = TAI->getGlobalPrefix() + name;
+    GVName = MAI->getGlobalPrefix() + name;
     n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT;
     break;
   case GlobalValue::PrivateLinkage:
-    GVName = TAI->getPrivateGlobalPrefix() + name;
+    GVName = MAI->getPrivateGlobalPrefix() + name;
+    break;
+  case GlobalValue::LinkerPrivateLinkage:
+    GVName = MAI->getLinkerPrivateGlobalPrefix() + name;
     break;
   case GlobalValue::InternalLinkage:
-    GVName = TAI->getGlobalPrefix() + name;
+    GVName = MAI->getGlobalPrefix() + name;
     break;
   }
 }
 
 } // end namespace llvm
-
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
index 3af2b0a..9273f38 100644
--- a/lib/CodeGen/MachOWriter.h
+++ b/lib/CodeGen/MachOWriter.h
@@ -14,22 +14,28 @@
 #ifndef MACHOWRITER_H
 #define MACHOWRITER_H
 
-#include "MachO.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachOWriterInfo.h"
+#include <vector>
 #include <map>
 
 namespace llvm {
+  class Constant;
   class GlobalVariable;
   class Mangler;
-  class MachineCodeEmitter;
+  class MachineBasicBlock;
+  class MachineRelocation;
   class MachOCodeEmitter;
+  struct MachODySymTab;
+  struct MachOHeader;
+  struct MachOSection;
+  struct MachOSym;
+  class TargetData;
+  class TargetMachine;
+  class MCAsmInfo;
+  class ObjectCodeEmitter;
   class OutputBuffer;
   class raw_ostream;
 
-      
   /// MachOWriter - This class implements the common target-independent code for
   /// writing Mach-O files.  Targets should derive a class from this to
   /// parameterize the output format.
@@ -38,8 +44,9 @@ namespace llvm {
     friend class MachOCodeEmitter;
   public:
     static char ID;
-    MachineCodeEmitter &getMachineCodeEmitter() const {
-      return *(MachineCodeEmitter*)MCE;
+
+    ObjectCodeEmitter *getObjectCodeEmitter() {
+      return reinterpret_cast<ObjectCodeEmitter*>(MachOCE);
     }
 
     MachOWriter(raw_ostream &O, TargetMachine &TM);
@@ -61,36 +68,30 @@ namespace llvm {
     /// Mang - The object used to perform name mangling for this module.
     ///
     Mangler *Mang;
-    
-    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
-    /// code for functions to the .o file.
 
-    MachOCodeEmitter *MCE;
+    /// MachOCE - The MachineCodeEmitter object that we are exposing to emit
+    /// machine code for functions to the .o file.
+    MachOCodeEmitter *MachOCE;
 
     /// is64Bit/isLittleEndian - This information is inferred from the target
     /// machine directly, indicating what header values and flags to set.
-
     bool is64Bit, isLittleEndian;
 
     // Target Asm Info
-
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     /// Header - An instance of MachOHeader that we will update while we build
     /// the file, and then emit during finalization.
-    
     MachOHeader Header;
 
     /// doInitialization - Emit the file header and all of the global variables
     /// for the module to the Mach-O file.
-
     bool doInitialization(Module &M);
 
     bool runOnMachineFunction(MachineFunction &MF);
 
     /// doFinalization - Now that the module has been completely processed, emit
     /// the Mach-O file to 'O'.
-
     bool doFinalization(Module &M);
 
   private:
@@ -98,85 +99,37 @@ namespace llvm {
     /// SectionList - This is the list of sections that we have emitted to the
     /// file.  Once the file has been completely built, the segment load command
     /// SectionCommands are constructed from this info.
-
     std::vector<MachOSection*> SectionList;
 
     /// SectionLookup - This is a mapping from section name to SectionList entry
-
     std::map<std::string, MachOSection*> SectionLookup;
-    
+
     /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
     /// to aid in emitting relocations.
-
     std::map<GlobalValue*, MachOSection*> GVSection;
 
-    /// GVOffset - This is a mapping from a GlobalValue to an offset from the 
+    /// GVOffset - This is a mapping from a GlobalValue to an offset from the
     /// start of the section in which the GV resides, to aid in emitting
     /// relocations.
-
     std::map<GlobalValue*, intptr_t> GVOffset;
 
     /// getSection - Return the section with the specified name, creating a new
     /// section if one does not already exist.
-
     MachOSection *getSection(const std::string &seg, const std::string &sect,
-                             unsigned Flags = 0) {
-      MachOSection *MOS = SectionLookup[seg+sect];
-      if (MOS) return MOS;
-
-      MOS = new MachOSection(seg, sect);
-      SectionList.push_back(MOS);
-      MOS->Index = SectionList.size();
-      MOS->flags = MachOSection::S_REGULAR | Flags;
-      SectionLookup[seg+sect] = MOS;
-      return MOS;
-    }
-    MachOSection *getTextSection(bool isCode = true) {
-      if (isCode)
-        return getSection("__TEXT", "__text", 
-                          MachOSection::S_ATTR_PURE_INSTRUCTIONS |
-                          MachOSection::S_ATTR_SOME_INSTRUCTIONS);
-      else
-        return getSection("__TEXT", "__text");
-    }
-    MachOSection *getBSSSection() {
-      return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
-    }
+                             unsigned Flags = 0);
+
+    /// getTextSection - Return text section with different flags for code/data
+    MachOSection *getTextSection(bool isCode = true);
+
     MachOSection *getDataSection() {
       return getSection("__DATA", "__data");
     }
-    MachOSection *getConstSection(Constant *C) {
-      const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
-      if (CVA && CVA->isCString())
-        return getSection("__TEXT", "__cstring", 
-                          MachOSection::S_CSTRING_LITERALS);
-      
-      const Type *Ty = C->getType();
-      if (Ty->isPrimitiveType() || Ty->isInteger()) {
-        unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
-        switch(Size) {
-        default: break; // Fall through to __TEXT,__const
-        case 4:
-          return getSection("__TEXT", "__literal4",
-                            MachOSection::S_4BYTE_LITERALS);
-        case 8:
-          return getSection("__TEXT", "__literal8",
-                            MachOSection::S_8BYTE_LITERALS);
-        case 16:
-          return getSection("__TEXT", "__literal16",
-                            MachOSection::S_16BYTE_LITERALS);
-        }
-      }
-      return getSection("__TEXT", "__const");
-    }
-    MachOSection *getJumpTableSection() {
-      if (TM.getRelocationModel() == Reloc::PIC_)
-        return getTextSection(false);
-      else
-        return getSection("__TEXT", "__const");
-    }
-    
-    /// MachOSymTab - This struct contains information about the offsets and 
+
+    MachOSection *getBSSSection();
+    MachOSection *getConstSection(Constant *C);
+    MachOSection *getJumpTableSection();
+
+    /// MachOSymTab - This struct contains information about the offsets and
     /// size of symbol table information.
     /// segment.
     struct MachOSymTab {
@@ -191,43 +144,42 @@ namespace llvm {
       // see <mach-o/loader.h>
       enum { LC_SYMTAB = 0x02  // link-edit stab symbol table info
       };
-      
+
       MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
         nsyms(0), stroff(0), strsize(0) { }
     };
-    
+
     /// SymTab - The "stab" style symbol table information
-    MachOSymTab   SymTab;     
+    MachOSymTab SymTab;
     /// DySymTab - symbol table info for the dynamic link editor
     MachODySymTab DySymTab;
 
   protected:
-  
+
     /// SymbolTable - This is the list of symbols we have emitted to the file.
     /// This actually gets rearranged before emission to the file (to put the
     /// local symbols first in the list).
     std::vector<MachOSym> SymbolTable;
-    
+
     /// SymT - A buffer to hold the symbol table before we write it out at the
     /// appropriate location in the file.
-    DataBuffer SymT;
-    
+    std::vector<unsigned char> SymT;
+
     /// StrT - A buffer to hold the string table before we write it out at the
     /// appropriate location in the file.
-    DataBuffer StrT;
-    
+    std::vector<unsigned char> StrT;
+
     /// PendingSyms - This is a list of externally defined symbols that we have
     /// been asked to emit, but have not seen a reference to.  When a reference
     /// is seen, the symbol will move from this list to the SymbolTable.
     std::vector<GlobalValue*> PendingGlobals;
-    
+
     /// DynamicSymbolTable - This is just a vector of indices into
     /// SymbolTable to aid in emitting the DYSYMTAB load command.
     std::vector<unsigned> DynamicSymbolTable;
-    
-    static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
-                        const TargetData *TD, 
-                        std::vector<MachineRelocation> &MRs);
+
+    static void InitMem(const Constant *C, uintptr_t Offset,
+                        const TargetData *TD, MachOSection* mos);
 
   private:
     void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
@@ -238,25 +190,16 @@ namespace llvm {
     void BufferSymbolAndStringTable();
     void CalculateRelocations(MachOSection &MOS);
 
+    // GetJTRelocation - Get a relocation a new BB relocation based
+    // on target information.
     MachineRelocation GetJTRelocation(unsigned Offset,
-                                      MachineBasicBlock *MBB) const {
-      return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
-    }
+                                      MachineBasicBlock *MBB) const;
 
     /// GetTargetRelocation - Returns the number of relocations.
-    unsigned GetTargetRelocation(MachineRelocation &MR,
-                                 unsigned FromIdx,
-                                 unsigned ToAddr,
-                                 unsigned ToIndex,
-                                 OutputBuffer &RelocOut,
-                                 OutputBuffer &SecOut,
-                                 bool Scattered,
-                                 bool Extern) {
-      return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
-                                                          ToIndex, RelocOut,
-                                                          SecOut, Scattered,
-                                                          Extern);
-    }
+    unsigned GetTargetRelocation(MachineRelocation &MR, unsigned FromIdx,
+                                 unsigned ToAddr, unsigned ToIndex,
+                                 OutputBuffer &RelocOut, OutputBuffer &SecOut,
+                                 bool Scattered, bool Extern);
   };
 }
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 71e6b3e..b3eb2da 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -31,7 +32,7 @@ MachineBasicBlock::~MachineBasicBlock() {
   LeakDetector::removeGarbageObject(this);
 }
 
-std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   MBB.print(OS);
   return OS;
 }
@@ -43,7 +44,7 @@ std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
 /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
 /// gets the next available unique MBB number. If it is removed from a
 /// MachineFunction, it goes back to being #-1.
-void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
   MachineFunction &MF = *N->getParent();
   N->Number = MF.addToMBBNumbering(N);
 
@@ -55,7 +56,7 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
   LeakDetector::removeGarbageObject(N);
 }
 
-void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
   N->getParent()->removeFromMBBNumbering(N->Number);
   N->Number = -1;
   LeakDetector::addGarbageObject(N);
@@ -65,7 +66,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
 /// addNodeToList (MI) - When we add an instruction to a basic block
 /// list, we update its parent pointer and add its operands from reg use/def
 /// lists if appropriate.
-void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   assert(N->getParent() == 0 && "machine instruction already in a basic block");
   N->setParent(Parent);
   
@@ -80,7 +81,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
 /// removeNodeFromList (MI) - When we remove an instruction from a basic block
 /// list, we update its parent pointer and remove its operands from reg use/def
 /// lists if appropriate.
-void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   assert(N->getParent() != 0 && "machine instruction not in a basic block");
 
   // Remove from the use/def lists.
@@ -94,10 +95,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
 /// transferNodesFromList (MI) - When moving a range of instructions from one
 /// MBB list to another, we need to update the parent pointers and the use/def
 /// lists.
-void ilist_traits<MachineInstr>::transferNodesFromList(
-      ilist_traits<MachineInstr>& fromList,
-      MachineBasicBlock::iterator first,
-      MachineBasicBlock::iterator last) {
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+                      MachineBasicBlock::iterator first,
+                      MachineBasicBlock::iterator last) {
   assert(Parent->getParent() == fromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
 
@@ -123,21 +124,41 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   return I;
 }
 
-bool
-MachineBasicBlock::isOnlyReachableByFallthrough() const {
-  return !isLandingPad() &&
-         !pred_empty() &&
-         next(pred_begin()) == pred_end() &&
-         (*pred_begin())->isLayoutSuccessor(this) &&
-         ((*pred_begin())->empty() ||
-          !(*pred_begin())->back().getDesc().isBarrier());
+/// isOnlyReachableViaFallthough - Return true if this basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (isLandingPad() || pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  const_pred_iterator PI = pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(this))
+    return false;
+  
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+  
+  // Otherwise, check the last instruction.
+  const MachineInstr &LastInst = Pred->back();
+  return !LastInst.getDesc().isBarrier();
 }
 
 void MachineBasicBlock::dump() const {
-  print(*cerr.stream());
+  print(errs());
 }
 
-static inline void OutputReg(std::ostream &os, unsigned RegNo,
+static inline void OutputReg(raw_ostream &os, unsigned RegNo,
                              const TargetRegisterInfo *TRI = 0) {
   if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) {
     if (TRI)
@@ -148,16 +169,16 @@ static inline void OutputReg(std::ostream &os, unsigned RegNo,
     os << " %reg" << RegNo;
 }
 
-void MachineBasicBlock::print(std::ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS) const {
   const MachineFunction *MF = getParent();
-  if(!MF) {
+  if (!MF) {
     OS << "Can't print out MachineBasicBlock because parent MachineFunction"
        << " is null\n";
     return;
   }
 
   const BasicBlock *LBB = getBasicBlock();
-  OS << "\n";
+  OS << '\n';
   if (LBB) OS << LBB->getName() << ": ";
   OS << (const void*)this
      << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
@@ -170,18 +191,18 @@ void MachineBasicBlock::print(std::ostream &OS) const {
     OS << "Live Ins:";
     for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
       OutputReg(OS, *I, TRI);
-    OS << "\n";
+    OS << '\n';
   }
   // Print the preds of this block according to the CFG.
   if (!pred_empty()) {
     OS << "    Predecessors according to CFG:";
     for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
-      OS << " " << *PI << " (#" << (*PI)->getNumber() << ")";
-    OS << "\n";
+      OS << ' ' << *PI << " (#" << (*PI)->getNumber() << ')';
+    OS << '\n';
   }
   
   for (const_iterator I = begin(); I != end(); ++I) {
-    OS << "\t";
+    OS << '\t';
     I->print(OS, &getParent()->getTarget());
   }
 
@@ -189,8 +210,8 @@ void MachineBasicBlock::print(std::ostream &OS) const {
   if (!succ_empty()) {
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
-      OS << " " << *SI << " (#" << (*SI)->getNumber() << ")";
-    OS << "\n";
+      OS << ' ' << *SI << " (#" << (*SI)->getNumber() << ')';
+    OS << '\n';
   }
 }
 
@@ -245,16 +266,15 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
   Predecessors.erase(I);
 }
 
-void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB)
-{
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
   
-  for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), 
-      end = fromMBB->succ_end(); iter != end; ++iter) {
-      addSuccessor(*iter);
-  }
-  while(!fromMBB->succ_empty())
+  for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), 
+       E = fromMBB->succ_end(); I != E; ++I)
+    addSuccessor(*I);
+  
+  while (!fromMBB->succ_empty())
     fromMBB->removeSuccessor(fromMBB->succ_begin());
 }
 
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 37c8601..0f796f3 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -51,3 +51,7 @@ MachineDominatorTree::~MachineDominatorTree() {
 void MachineDominatorTree::releaseMemory() {
   DT->releaseMemory();
 }
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+  DT->print(OS);
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 599efb8..b0ec809 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -32,89 +33,56 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include <fstream>
-#include <sstream>
 using namespace llvm;
 
-bool MachineFunctionPass::runOnFunction(Function &F) {
-  // Do not codegen any 'available_externally' functions at all, they have
-  // definitions outside the translation unit.
-  if (F.hasAvailableExternallyLinkage())
-    return false;
-  
-  return runOnMachineFunction(MachineFunction::get(&F));
-}
-
 namespace {
   struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
     static char ID;
 
-    std::ostream *OS;
+    raw_ostream &OS;
     const std::string Banner;
 
-    Printer (std::ostream *os, const std::string &banner) 
+    Printer(raw_ostream &os, const std::string &banner) 
       : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
 
     const char *getPassName() const { return "MachineFunction Printer"; }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
     }
 
     bool runOnMachineFunction(MachineFunction &MF) {
-      (*OS) << Banner;
-      MF.print (*OS);
+      OS << Banner;
+      MF.print(OS);
       return false;
     }
   };
   char Printer::ID = 0;
 }
 
-/// Returns a newly-created MachineFunction Printer pass. The default output
-/// stream is std::cerr; the default banner is empty.
+/// Returns a newly-created MachineFunction Printer pass. The default banner is
+/// empty.
 ///
-FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS,
+FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
                                                      const std::string &Banner){
   return new Printer(OS, Banner);
 }
 
-namespace {
-  struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
-    static char ID;
-    Deleter() : MachineFunctionPass(&ID) {}
-
-    const char *getPassName() const { return "Machine Code Deleter"; }
-
-    bool runOnMachineFunction(MachineFunction &MF) {
-      // Delete the annotation from the function now.
-      MachineFunction::destruct(MF.getFunction());
-      return true;
-    }
-  };
-  char Deleter::ID = 0;
-}
-
-/// MachineCodeDeletion Pass - This pass deletes all of the machine code for
-/// the current function, which should happen after the function has been
-/// emitted to a .s file or to memory.
-FunctionPass *llvm::createMachineCodeDeleter() {
-  return new Deleter();
-}
-
-
-
 //===---------------------------------------------------------------------===//
 // MachineFunction implementation
 //===---------------------------------------------------------------------===//
 
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
 void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
-MachineFunction::MachineFunction(const Function *F,
+MachineFunction::MachineFunction(Function *F,
                                  const TargetMachine &TM)
-  : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")),
-    Fn(F), Target(TM) {
+  : Fn(F), Target(TM) {
   if (TM.getRegisterInfo())
     RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
                   MachineRegisterInfo(*TM.getRegisterInfo());
@@ -131,7 +99,8 @@ MachineFunction::MachineFunction(const Function *F,
   const TargetData &TD = *TM.getTargetData();
   bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
   unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
-  unsigned TyAlignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+  unsigned TyAlignment = IsPic ?
+                       TD.getABITypeAlignment(Type::getInt32Ty(F->getContext()))
                                : TD.getPointerABIAlignment();
   JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
                       MachineJumpTableInfo(EntrySize, TyAlignment);
@@ -221,11 +190,6 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
 ///
 void
 MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
-  // Clear the instructions memoperands. This must be done manually because
-  // the instruction's parent pointer is now null, so it can't properly
-  // deallocate them on its own.
-  MI->clearMemOperands(*this);
-
   MI->~MachineInstr();
   InstructionRecycler.Deallocate(Allocator, MI);
 }
@@ -248,12 +212,99 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
   BasicBlockRecycler.Deallocate(Allocator, MBB);
 }
 
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
+                                      int64_t o, uint64_t s,
+                                      unsigned base_alignment) {
+  return new (Allocator.Allocate<MachineMemOperand>())
+             MachineMemOperand(v, f, o, s, base_alignment);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+                                      int64_t Offset, uint64_t Size) {
+  return new (Allocator.Allocate<MachineMemOperand>())
+             MachineMemOperand(MMO->getValue(), MMO->getFlags(),
+                               int64_t(uint64_t(MMO->getOffset()) +
+                                       uint64_t(Offset)),
+                               Size, MMO->getBaseAlignment());
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+  return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+                                    MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isLoad())
+      ++Num;
+
+  // Allocate a new array and populate it with the load information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isLoad()) {
+      if (!(*I)->isStore())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the store flag.
+        MachineMemOperand *JustLoad =
+          getMachineMemOperand((*I)->getValue(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOStore,
+                               (*I)->getOffset(), (*I)->getSize(),
+                               (*I)->getBaseAlignment());
+        Result[Index] = JustLoad;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+                                     MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isStore())
+      ++Num;
+
+  // Allocate a new array and populate it with the store information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isStore()) {
+      if (!(*I)->isLoad())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the load flag.
+        MachineMemOperand *JustStore =
+          getMachineMemOperand((*I)->getValue(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+                               (*I)->getOffset(), (*I)->getSize(),
+                               (*I)->getBaseAlignment());
+        Result[Index] = JustStore;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
 void MachineFunction::dump() const {
-  print(*cerr.stream());
+  print(errs());
 }
 
-void MachineFunction::print(std::ostream &OS) const {
-  OS << "# Machine code for " << Fn->getName () << "():\n";
+void MachineFunction::print(raw_ostream &OS) const {
+  OS << "# Machine code for " << Fn->getName() << "():\n";
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -262,10 +313,7 @@ void MachineFunction::print(std::ostream &OS) const {
   JumpTableInfo->print(OS);
 
   // Print Constant Pool
-  {
-    raw_os_ostream OSS(OS);
-    ConstantPool->print(OSS);
-  }
+  ConstantPool->print(OS);
   
   const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
   
@@ -279,32 +327,32 @@ void MachineFunction::print(std::ostream &OS) const {
         OS << " Reg #" << I->first;
       
       if (I->second)
-        OS << " in VR#" << I->second << " ";
+        OS << " in VR#" << I->second << ' ';
     }
-    OS << "\n";
+    OS << '\n';
   }
   if (RegInfo && !RegInfo->liveout_empty()) {
     OS << "Live Outs:";
     for (MachineRegisterInfo::liveout_iterator
          I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
       if (TRI)
-        OS << " " << TRI->getName(*I);
+        OS << ' ' << TRI->getName(*I);
       else
         OS << " Reg #" << *I;
-    OS << "\n";
+    OS << '\n';
   }
   
-  for (const_iterator BB = begin(); BB != end(); ++BB)
+  for (const_iterator BB = begin(), E = end(); BB != E; ++BB)
     BB->print(OS);
 
-  OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
+  OS << "\n# End machine code for " << Fn->getName() << "().\n\n";
 }
 
 namespace llvm {
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getFunction()->getName() + "' function";
+      return "CFG for '" + F->getFunction()->getNameStr() + "' function";
     }
 
     static std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -312,17 +360,18 @@ namespace llvm {
                                     bool ShortNames) {
       if (ShortNames && Node->getBasicBlock() &&
           !Node->getBasicBlock()->getName().empty())
-        return Node->getBasicBlock()->getName() + ":";
-
-      std::ostringstream Out;
-      if (ShortNames) {
-        Out << Node->getNumber() << ':';
-        return Out.str();
+        return Node->getBasicBlock()->getNameStr() + ":";
+
+      std::string OutStr;
+      {
+        raw_string_ostream OSS(OutStr);
+        
+        if (ShortNames)
+          OSS << Node->getNumber() << ':';
+        else
+          Node->print(OSS);
       }
 
-      Node->print(Out);
-
-      std::string OutStr = Out.str();
       if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
 
       // Process string output to make it nicer...
@@ -339,59 +388,23 @@ namespace llvm {
 void MachineFunction::viewCFG() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getName());
+  ViewGraph(this, "mf" + getFunction()->getNameStr());
 #else
-  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif // NDEBUG
 }
 
 void MachineFunction::viewCFGOnly() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getName(), true);
+  ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
 #else
-  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif // NDEBUG
 }
 
-// The next two methods are used to construct and to retrieve
-// the MachineCodeForFunction object for the given function.
-// construct() -- Allocates and initializes for a given function and target
-// get()       -- Returns a handle to the object.
-//                This should not be called before "construct()"
-//                for a given Function.
-//
-MachineFunction&
-MachineFunction::construct(const Function *Fn, const TargetMachine &Tar)
-{
-  AnnotationID MF_AID =
-                    AnnotationManager::getID("CodeGen::MachineCodeForFunction");
-  assert(Fn->getAnnotation(MF_AID) == 0 &&
-         "Object already exists for this function!");
-  MachineFunction* mcInfo = new MachineFunction(Fn, Tar);
-  Fn->addAnnotation(mcInfo);
-  return *mcInfo;
-}
-
-void MachineFunction::destruct(const Function *Fn) {
-  AnnotationID MF_AID =
-                    AnnotationManager::getID("CodeGen::MachineCodeForFunction");
-  bool Deleted = Fn->deleteAnnotation(MF_AID);
-  assert(Deleted && "Machine code did not exist for function!"); 
-  Deleted = Deleted; // silence warning when no assertions.
-}
-
-MachineFunction& MachineFunction::get(const Function *F)
-{
-  AnnotationID MF_AID =
-                    AnnotationManager::getID("CodeGen::MachineCodeForFunction");
-  MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID);
-  assert(mc && "Call construct() method first to allocate the object");
-  return *mc;
-}
-
 /// addLiveIn - Add the specified physical register as a live-in value and
 /// create a corresponding virtual register for it.
 unsigned MachineFunction::addLiveIn(unsigned PReg,
@@ -402,23 +415,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   return VReg;
 }
 
-/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given
-/// source file, line, and column. If none currently exists, create a new
-/// DebugLocTuple, and insert it into the DebugIdMap.
-unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit,
-                                                unsigned Line, unsigned Col) {
-  DebugLocTuple Tuple(CompileUnit, Line, Col);
-  DenseMap<DebugLocTuple, unsigned>::iterator II
-    = DebugLocInfo.DebugIdMap.find(Tuple);
-  if (II != DebugLocInfo.DebugIdMap.end())
-    return II->second;
-  // Add a new tuple.
-  unsigned Id = DebugLocInfo.DebugLocations.size();
-  DebugLocInfo.DebugLocations.push_back(Tuple);
-  DebugLocInfo.DebugIdMap[Tuple] = Id;
-  return Id;
-}
-
 /// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object.
 DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
   unsigned Idx = DL.getIndex();
@@ -444,7 +440,38 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
 }
 
 
-void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+  assert(MBB && "MBB must be valid");
+  const MachineFunction *MF = MBB->getParent();
+  assert(MF && "MBB must be part of a MachineFunction");
+  const TargetMachine &TM = MF->getTarget();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  BitVector BV(TRI->getNumRegs());
+
+  // Before CSI is calculated, no registers are considered pristine. They can be
+  // freely used and PEI will make sure they are saved.
+  if (!isCalleeSavedInfoValid())
+    return BV;
+
+  for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+    BV.set(*CSR);
+
+  // The entry MBB always has all CSRs pristine.
+  if (MBB == &MF->front())
+    return BV;
+
+  // On other MBBs the saved CSRs are not pristine.
+  const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+         E = CSI.end(); I != E; ++I)
+    BV.reset(I->getReg());
+
+  return BV;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
   int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
 
@@ -481,10 +508,9 @@ void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
 }
 
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
-  print(MF, *cerr.stream());
+  print(MF, errs());
 }
 
-
 //===----------------------------------------------------------------------===//
 //  MachineJumpTableInfo implementation
 //===----------------------------------------------------------------------===//
@@ -521,7 +547,7 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
   return MadeChange;
 }
 
-void MachineJumpTableInfo::print(std::ostream &OS) const {
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
   // FIXME: this is lame, maybe we could print out the MBB numbers or something
   // like {1, 2, 4, 5, 3, 0}
   for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
@@ -530,7 +556,7 @@ void MachineJumpTableInfo::print(std::ostream &OS) const {
   }
 }
 
-void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
+void MachineJumpTableInfo::dump() const { print(errs()); }
 
 
 //===----------------------------------------------------------------------===//
@@ -539,10 +565,17 @@ void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
 
 const Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
-      return Val.MachineCPVal->getType();
+    return Val.MachineCPVal->getType();
   return Val.ConstVal->getType();
 }
 
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getRelocationInfo();
+  return Val.ConstVal->getRelocationInfo();
+}
+
 MachineConstantPool::~MachineConstantPool() {
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
     if (Constants[i].isMachineConstantPoolEntry())
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 0000000..56294d9
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,50 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+using namespace llvm;
+
+// Register this pass with PassInfo directly to avoid having to define
+// a default constructor.
+static PassInfo
+X("Machine Function Analysis", "machine-function-analysis",
+  intptr_t(&MachineFunctionAnalysis::ID), 0,
+  /*CFGOnly=*/false, /*is_analysis=*/true);
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm,
+                                                 CodeGenOpt::Level OL) :
+  FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+  releaseMemory();
+  assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+  assert(!MF && "MachineFunctionAnalysis already initialized!");
+  MF = new MachineFunction(&F, TM);
+  return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+  delete MF;
+  MF = 0;
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 0000000..2f8d4c9
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,50 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+  // Do not codegen any 'available_externally' functions at all, they have
+  // definitions outside the translation unit.
+  if (F.hasAvailableExternallyLinkage())
+    return false;
+
+  MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+  return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addPreserved<MachineFunctionAnalysis>();
+
+  // MachineFunctionPass preserves all LLVM IR passes, but there's no
+  // high-level way to express this. Instead, just list a bunch of
+  // passes explicitly. This does not include setPreservesCFG,
+  // because CodeGen overloads that to mean preserving the MachineBasicBlock
+  // CFG in addition to the LLVM IR CFG.
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved("scalar-evolution");
+  AU.addPreserved("iv-users");
+  AU.addPreserved("memdep");
+  AU.addPreserved("live-values");
+  AU.addPreserved("domtree");
+  AU.addPreserved("domfrontier");
+  AU.addPreserved("loops");
+  AU.addPreserved("lda");
+
+  FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index d44305f..cbe5c7c 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,17 +15,20 @@
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Value.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
 using namespace llvm;
@@ -156,7 +159,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return false;
   
   switch (getType()) {
-  default: assert(0 && "Unrecognized operand type");
+  default: llvm_unreachable("Unrecognized operand type");
   case MachineOperand::MO_Register:
     return getReg() == Other.getReg() && isDef() == Other.isDef() &&
            getSubReg() == Other.getSubReg();
@@ -182,11 +185,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
 
 /// print - Print the specified machine operand.
 ///
-void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const {
-  raw_os_ostream RawOS(OS);
-  print(RawOS, TM);
-}
-
 void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
@@ -242,7 +240,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << getImm();
     break;
   case MachineOperand::MO_FPImmediate:
-    if (getFPImm()->getType() == Type::FloatTy)
+    if (getFPImm()->getType()->isFloatTy())
       OS << getFPImm()->getValueAPF().convertToFloat();
     else
       OS << getFPImm()->getValueAPF().convertToDouble();
@@ -274,7 +272,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << '>';
     break;
   default:
-    assert(0 && "Unrecognized operand type");
+    llvm_unreachable("Unrecognized operand type");
   }
   
   if (unsigned TF = getTargetFlags())
@@ -289,7 +287,7 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
                                      int64_t o, uint64_t s, unsigned int a)
   : Offset(o), Size(s), V(v),
     Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
-  assert(isPowerOf2_32(a) && "Alignment is not a power of 2!");
+  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 }
 
@@ -302,6 +300,66 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
   ID.AddInteger(Flags);
 }
 
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+  // The Value and Offset may differ due to CSE. But the flags and size
+  // should be the same.
+  assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+  assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+  if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+    // Update the alignment value.
+    Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3);
+    // Also update the base and offset, because the new alignment may
+    // not be applicable with the old ones.
+    V = MMO->getValue();
+    Offset = MMO->getOffset();
+  }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+  return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+  assert((MMO.isLoad() || MMO.isStore()) &&
+         "SV has to be a load, store or both.");
+  
+  if (MMO.isVolatile())
+    OS << "Volatile ";
+
+  if (MMO.isLoad())
+    OS << "LD";
+  if (MMO.isStore())
+    OS << "ST";
+  OS << MMO.getSize();
+  
+  // Print the address information.
+  OS << "[";
+  if (!MMO.getValue())
+    OS << "<unknown>";
+  else
+    WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+  // If the alignment of the memory reference itself differs from the alignment
+  // of the base pointer, print the base alignment explicitly, next to the base
+  // pointer.
+  if (MMO.getBaseAlignment() != MMO.getAlignment())
+    OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+  if (MMO.getOffset() != 0)
+    OS << "+" << MMO.getOffset();
+  OS << "]";
+
+  // Print the alignment of the reference.
+  if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+      MMO.getBaseAlignment() != MMO.getSize())
+    OS << "(align=" << MMO.getAlignment() << ")";
+
+  return OS;
+}
+
 //===----------------------------------------------------------------------===//
 // MachineInstr Implementation
 //===----------------------------------------------------------------------===//
@@ -309,7 +367,8 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
 /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
 /// TID NULL and no operands.
 MachineInstr::MachineInstr()
-  : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+  : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+    Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
 }
@@ -328,7 +387,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// TargetInstrDesc or the numOperands if it is not zero. (for
 /// instructions with variable number of operands).
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Parent(0), 
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
     debugLoc(DebugLoc::getUnknownLoc()) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -346,7 +405,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
 /// MachineInstr ctor - As above, but with a DebugLoc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+    Parent(0), debugLoc(dl) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
       NumImplicitOps++;
@@ -365,7 +425,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// basic block.
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Parent(0), 
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), 
     debugLoc(DebugLoc::getUnknownLoc()) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
@@ -385,7 +445,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+    Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -403,8 +464,9 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0), 
-        debugLoc(MI.getDebugLoc()) {
+  : TID(&MI.getDesc()), NumImplicitOps(0),
+    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
 
   // Add operands
@@ -412,11 +474,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
     addOperand(MI.getOperand(i));
   NumImplicitOps = MI.NumImplicitOps;
 
-  // Add memory operands.
-  for (std::list<MachineMemOperand>::const_iterator i = MI.memoperands_begin(),
-       j = MI.memoperands_end(); i != j; ++i)
-    addMemOperand(MF, *i);
-
   // Set parent to null.
   Parent = 0;
 
@@ -425,8 +482,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
 
 MachineInstr::~MachineInstr() {
   LeakDetector::removeGarbageObject(this);
-  assert(MemOperands.empty() &&
-         "MachineInstr being deleted with live memoperands!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
     assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
@@ -587,18 +642,24 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
   }
 }
 
-/// addMemOperand - Add a MachineMemOperand to the machine instruction,
-/// referencing arbitrary storage.
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
 void MachineInstr::addMemOperand(MachineFunction &MF,
-                                 const MachineMemOperand &MO) {
-  MemOperands.push_back(MO);
-}
+                                 MachineMemOperand *MO) {
+  mmo_iterator OldMemRefs = MemRefs;
+  mmo_iterator OldMemRefsEnd = MemRefsEnd;
 
-/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands.
-void MachineInstr::clearMemOperands(MachineFunction &MF) {
-  MemOperands.clear();
-}
+  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
+
+  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  NewMemRefs[NewNum - 1] = MO;
 
+  MemRefs = NewMemRefs;
+  MemRefsEnd = NewMemRefsEnd;
+}
 
 /// removeFromParent - This method unlinks 'this' from the containing basic
 /// block, and returns it, but does not delete it.
@@ -657,7 +718,7 @@ bool MachineInstr::isDebugLabel() const {
 }
 
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
-/// the specific register or -1 if it is not found. It further tightening
+/// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
 int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
                                           const TargetRegisterInfo *TRI) const {
@@ -731,7 +792,9 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
     unsigned DefPart = 0;
     for (unsigned i = 1, e = getNumOperands(); i < e; ) {
       const MachineOperand &FMO = getOperand(i);
-      assert(FMO.isImm());
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!FMO.isImm())
+        return false;
       // Skip over this def.
       unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
       unsigned PrevDef = i + 1;
@@ -782,16 +845,22 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
     const MachineOperand &MO = getOperand(UseOpIdx);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
       return false;
-    int FlagIdx = UseOpIdx - 1;
-    if (FlagIdx < 1)
-      return false;
-    while (!getOperand(FlagIdx).isImm()) {
-      if (--FlagIdx == 0)
+
+    // Find the flag operand corresponding to UseOpIdx
+    unsigned FlagIdx, NumOps=0;
+    for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+      const MachineOperand &UFMO = getOperand(FlagIdx);
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!UFMO.isImm())
         return false;
+      NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
+      assert(NumOps < getNumOperands() && "Invalid inline asm flag");
+      if (UseOpIdx < FlagIdx+NumOps+1)
+        break;
     }
-    const MachineOperand &UFMO = getOperand(FlagIdx);
-    if (FlagIdx + InlineAsm::getNumOperandRegisters(UFMO.getImm()) < UseOpIdx)
+    if (FlagIdx >= UseOpIdx)
       return false;
+    const MachineOperand &UFMO = getOperand(FlagIdx);
     unsigned DefNo;
     if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
       if (!DefOpIdx)
@@ -864,7 +933,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
 /// SawStore is set to true, it means that there is a store (or call) between
 /// the instruction's location and its intended destination.
 bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
-                                bool &SawStore) const {
+                                bool &SawStore,
+                                AliasAnalysis *AA) const {
   // Ignore stuff that we obviously can't move.
   if (TID->mayStore() || TID->isCall()) {
     SawStore = true;
@@ -878,9 +948,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (TID->mayLoad() && !TII->isInvariantLoad(this))
+  if (TID->mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
-    // end of block, or if the laod is volatile, we can't move it.
+    // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
 
   return true;
@@ -889,11 +959,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
 /// isSafeToReMat - Return true if it's safe to rematerialize the specified
 /// instruction which defined the specified register instead of copying it.
 bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
-                                 unsigned DstReg) const {
+                                 unsigned DstReg,
+                                 AliasAnalysis *AA) const {
   bool SawStore = false;
-  if (!getDesc().isRematerializable() ||
-      !TII->isTriviallyReMaterializable(this) ||
-      !isSafeToMove(TII, SawStore))
+  if (!TII->isTriviallyReMaterializable(this, AA) ||
+      !isSafeToMove(TII, SawStore, AA))
     return false;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
@@ -930,21 +1000,55 @@ bool MachineInstr::hasVolatileMemoryRef() const {
     return true;
   
   // Check the memory reference information for volatile references.
-  for (std::list<MachineMemOperand>::const_iterator I = memoperands_begin(),
-       E = memoperands_end(); I != E; ++I)
-    if (I->isVolatile())
+  for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+    if ((*I)->isVolatile())
       return true;
 
   return false;
 }
 
-void MachineInstr::dump() const {
-  cerr << "  " << *this;
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function.  For example,
+/// loading a value from the constant pool or from from the argument area
+/// of a function if it does not change.  This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+  // If the instruction doesn't load at all, it isn't an invariant load.
+  if (!TID->mayLoad())
+    return false;
+
+  // If the instruction has lost its memoperands, conservatively assume that
+  // it may not be an invariant load.
+  if (memoperands_empty())
+    return false;
+
+  const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+  for (mmo_iterator I = memoperands_begin(),
+       E = memoperands_end(); I != E; ++I) {
+    if ((*I)->isVolatile()) return false;
+    if ((*I)->isStore()) return false;
+
+    if (const Value *V = (*I)->getValue()) {
+      // A load from a constant PseudoSourceValue is invariant.
+      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+        if (PSV->isConstant(MFI))
+          continue;
+      // If we have an AliasAnalysis, ask it whether the memory is constant.
+      if (AA && AA->pointsToConstantMemory(V))
+        continue;
+    }
+
+    // Otherwise assume conservatively.
+    return false;
+  }
+
+  // Everything checks out.
+  return true;
 }
 
-void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const {
-  raw_os_ostream RawOS(OS);
-  print(RawOS, TM);
+void MachineInstr::dump() const {
+  errs() << "  " << *this;
 }
 
 void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -967,46 +1071,23 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
 
   if (!memoperands_empty()) {
     OS << ", Mem:";
-    for (std::list<MachineMemOperand>::const_iterator i = memoperands_begin(),
-         e = memoperands_end(); i != e; ++i) {
-      const MachineMemOperand &MRO = *i;
-      const Value *V = MRO.getValue();
-
-      assert((MRO.isLoad() || MRO.isStore()) &&
-             "SV has to be a load, store or both.");
-      
-      if (MRO.isVolatile())
-        OS << "Volatile ";
-
-      if (MRO.isLoad())
-        OS << "LD";
-      if (MRO.isStore())
-        OS << "ST";
-        
-      OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") [";
-      
-      if (!V)
-        OS << "<unknown>";
-      else if (!V->getName().empty())
-        OS << V->getName();
-      else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-        PSV->print(OS);
-      } else
-        OS << V;
-
-      OS << " + " << MRO.getOffset() << "]";
+    for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+         i != e; ++i) {
+      OS << **i;
+      if (next(i) != e)
+        OS << " ";
     }
   }
 
   if (!debugLoc.isUnknown()) {
     const MachineFunction *MF = getParent()->getParent();
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
-    DICompileUnit CU(DLT.CompileUnit);
-    std::string Dir, Fn;
-    OS << " [dbg: "
-       << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << ","
-       << DLT.Line << ","
-       << DLT.Col  << "]";
+    DICompileUnit CU(DLT.Scope);
+    if (!CU.isNull())
+      OS << " [dbg: "
+         << CU.getDirectory() << '/' << CU.getFilename() << ","
+         << DLT.Line << ","
+         << DLT.Col  << "]";
   }
 
   OS << "\n";
@@ -1021,7 +1102,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
   SmallVector<unsigned,4> DeadOps;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     MachineOperand &MO = getOperand(i);
-    if (!MO.isReg() || !MO.isUse())
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
@@ -1032,6 +1113,9 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
         if (MO.isKill())
           // The register is already marked kill.
           return true;
+        if (isPhysReg && isRegTiedToDefOperand(i))
+          // Two-address uses of physregs must not be marked kill.
+          return true;
         MO.setIsKill();
         Found = true;
       }
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index aaa4de4..f92ddb2 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,11 +28,12 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -43,8 +44,11 @@ namespace {
   class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass {
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    BitVector AllocatableSet;
 
     // Various analyses that we use...
+    AliasAnalysis        *AA;      // Alias analysis info.
     MachineLoopInfo      *LI;      // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
     MachineRegisterInfo  *RegInfo; // Machine register information
@@ -70,6 +74,7 @@ namespace {
       AU.setPreservesCFG();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<AliasAnalysis>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -126,20 +131,19 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
 /// loop.
 ///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    return false;
-
-  DOUT << "******** Machine LICM ********\n";
+  DEBUG(errs() << "******** Machine LICM ********\n");
 
   Changed = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
   RegInfo = &MF.getRegInfo();
+  AllocatableSet = TRI->getAllocatableSet(MF);
 
   // Get our Loop information...
   LI = &getAnalysis<MachineLoopInfo>();
   DT = &getAnalysis<MachineDominatorTree>();
+  AA = &getAnalysis<AliasAnalysis>();
 
   for (MachineLoopInfo::iterator
          I = LI->begin(), E = LI->end(); I != E; ++I) {
@@ -210,7 +214,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
-    if (!TII->isInvariantLoad(&I))
+    if (!I.isInvariantLoad(AA))
       // FIXME: we should be able to sink loads with no other side effects if
       // there is nothing that can change memory from here until the end of
       // block. This is a trivial form of alias analysis.
@@ -218,28 +222,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   }
 
   DEBUG({
-      DOUT << "--- Checking if we can hoist " << I;
+      errs() << "--- Checking if we can hoist " << I;
       if (I.getDesc().getImplicitUses()) {
-        DOUT << "  * Instruction has implicit uses:\n";
+        errs() << "  * Instruction has implicit uses:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
              *ImpUses; ++ImpUses)
-          DOUT << "      -> " << TRI->getName(*ImpUses) << "\n";
+          errs() << "      -> " << TRI->getName(*ImpUses) << "\n";
       }
 
       if (I.getDesc().getImplicitDefs()) {
-        DOUT << "  * Instruction has implicit defines:\n";
+        errs() << "  * Instruction has implicit defines:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
              *ImpDefs; ++ImpDefs)
-          DOUT << "      -> " << TRI->getName(*ImpDefs) << "\n";
+          errs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
       }
     });
 
   if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
-    DOUT << "Cannot hoist with implicit defines or uses\n";
+    DEBUG(errs() << "Cannot hoist with implicit defines or uses\n");
     return false;
   }
 
@@ -254,8 +258,30 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     if (Reg == 0) continue;
 
     // Don't hoist an instruction that uses or defines a physical register.
-    if (TargetRegisterInfo::isPhysicalRegister(Reg))
-      return false;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!RegInfo->def_empty(Reg))
+          return false;
+        if (AllocatableSet.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!RegInfo->def_empty(AliasReg))
+            return false;
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+        // Otherwise it's safe to move.
+        continue;
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
+        return false;
+      }
+    }
 
     if (!MO.isUse())
       continue;
@@ -291,13 +317,10 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
     return false;
 
-  const TargetInstrDesc &TID = MI.getDesc();
-
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
-  if (!TID.mayLoad() && (!TID.isRematerializable() ||
-                         !TII->isTriviallyReMaterializable(&MI)))
+  if (!TII->isTriviallyReMaterializable(&MI, AA))
     return false;
 
   // If result(s) of this instruction is used by PHIs, then don't hoist it.
@@ -355,14 +378,14 @@ void MachineLICM::Hoist(MachineInstr &MI) {
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
   DEBUG({
-      DOUT << "Hoisting " << MI;
+      errs() << "Hoisting " << MI;
       if (CurPreheader->getBasicBlock())
-        DOUT << " to MachineBasicBlock "
-             << CurPreheader->getBasicBlock()->getName();
+        errs() << " to MachineBasicBlock "
+               << CurPreheader->getBasicBlock()->getName();
       if (MI.getParent()->getBasicBlock())
-        DOUT << " from MachineBasicBlock "
-             << MI.getParent()->getBasicBlock()->getName();
-      DOUT << "\n";
+        errs() << " from MachineBasicBlock "
+               << MI.getParent()->getBasicBlock()->getName();
+      errs() << "\n";
     });
 
   // Look for opportunity to CSE the hoisted instruction.
@@ -374,8 +397,7 @@ void MachineLICM::Hoist(MachineInstr &MI) {
   if (CI != CSEMap.end()) {
     const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo);
     if (Dup) {
-      DOUT << "CSEing " << MI;
-      DOUT << " with " << *Dup;
+      DEBUG(errs() << "CSEing " << MI << " with " << *Dup);
       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
         const MachineOperand &MO = MI.getOperand(i);
         if (MO.isReg() && MO.isDef())
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index ff56f4d..2da8e37 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -19,8 +19,12 @@
 #include "llvm/CodeGen/Passes.h"
 using namespace llvm;
 
-TEMPLATE_INSTANTIATION(class LoopBase<MachineBasicBlock>);
-TEMPLATE_INSTANTIATION(class LoopInfoBase<MachineBasicBlock>);
+#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLB);
+#undef MLB
+#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLIB);
+#undef MLIB
 
 char MachineLoopInfo::ID = 0;
 static RegisterPass<MachineLoopInfo>
@@ -37,4 +41,5 @@ bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
 void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 1d8109e..b62803f 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -23,7 +23,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
@@ -32,23 +32,23 @@ static RegisterPass<MachineModuleInfo>
 X("machinemoduleinfo", "Module Information");
 char MachineModuleInfo::ID = 0;
 
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
 //===----------------------------------------------------------------------===//
-  
+
 MachineModuleInfo::MachineModuleInfo()
 : ImmutablePass(&ID)
-, LabelIDList()
-, FrameMoves()
-, LandingPads()
-, Personalities()
+, ObjFileMMI(0)
 , CallsEHReturn(0)
 , CallsUnwindInit(0)
-, DbgInfoAvailable(false)
-{
-  // Always emit "no personality" info
+, DbgInfoAvailable(false) {
+  // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
 }
-MachineModuleInfo::~MachineModuleInfo() {
 
+MachineModuleInfo::~MachineModuleInfo() {
+  delete ObjFileMMI;
 }
 
 /// doInitialization - Initialize the state for a new module.
@@ -63,18 +63,12 @@ bool MachineModuleInfo::doFinalization() {
   return false;
 }
 
-/// BeginFunction - Begin gathering function meta information.
-///
-void MachineModuleInfo::BeginFunction(MachineFunction *MF) {
-  // Coming soon.
-}
-
 /// EndFunction - Discard function meta information.
 ///
 void MachineModuleInfo::EndFunction() {
   // Clean up frame info.
   FrameMoves.clear();
-  
+
   // Clean up exception info.
   LandingPads.clear();
   TypeInfos.clear();
@@ -82,12 +76,16 @@ void MachineModuleInfo::EndFunction() {
   FilterEnds.clear();
   CallsEHReturn = 0;
   CallsUnwindInit = 0;
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  VariableDbgInfo.clear();
+#endif
 }
 
 /// AnalyzeModule - Scan the module for global debug information.
 ///
 void MachineModuleInfo::AnalyzeModule(Module &M) {
-  // Insert functions in the llvm.used array into UsedFunctions.
+  // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+  // UsedFunctions.
   GlobalVariable *GV = M.getGlobalVariable("llvm.used");
   if (!GV || !GV->hasInitializer()) return;
 
@@ -95,12 +93,10 @@ void MachineModuleInfo::AnalyzeModule(Module &M) {
   ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (InitList == 0) return;
 
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InitList->getOperand(i)))
-      if (CE->getOpcode() == Instruction::BitCast)
-        if (Function *F = dyn_cast<Function>(CE->getOperand(0)))
-          UsedFunctions.insert(F);
-  }
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (Function *F =
+          dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+      UsedFunctions.insert(F);
 }
 
 //===-EH-------------------------------------------------------------------===//
@@ -115,7 +111,7 @@ LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
     if (LP.LandingPadBlock == LandingPad)
       return LP;
   }
-  
+
   LandingPads.push_back(LandingPadInfo(LandingPad));
   return LandingPads[N];
 }
@@ -134,7 +130,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
 unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
   unsigned LandingPadLabel = NextLabelID();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.LandingPadLabel = LandingPadLabel;  
+  LP.LandingPadLabel = LandingPadLabel;
   return LandingPadLabel;
 }
 
@@ -148,8 +144,13 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
   for (unsigned i = 0; i < Personalities.size(); ++i)
     if (Personalities[i] == Personality)
       return;
-  
-  Personalities.push_back(Personality);
+
+  // If this is the first personality we're adding go
+  // ahead and add it at the beginning.
+  if (Personalities[0] == NULL)
+    Personalities[0] = Personality;
+  else
+    Personalities.push_back(Personality);
 }
 
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
@@ -224,7 +225,7 @@ void MachineModuleInfo::TidyLandingPads() {
   }
 }
 
-/// getTypeIDFor - Return the type id for the specified typeinfo.  This is 
+/// getTypeIDFor - Return the type id for the specified typeinfo.  This is
 /// function wide.
 unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
   for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
@@ -273,24 +274,24 @@ Function *MachineModuleInfo::getPersonality() const {
 }
 
 /// getPersonalityIndex - Return unique index for current personality
-/// function. NULL personality function should always get zero index.
+/// function. NULL/first personality function should always get zero index.
 unsigned MachineModuleInfo::getPersonalityIndex() const {
   const Function* Personality = NULL;
-  
+
   // Scan landing pads. If there is at least one non-NULL personality - use it.
   for (unsigned i = 0; i != LandingPads.size(); ++i)
     if (LandingPads[i].Personality) {
       Personality = LandingPads[i].Personality;
       break;
     }
-  
+
   for (unsigned i = 0; i < Personalities.size(); ++i) {
     if (Personalities[i] == Personality)
       return i;
   }
 
-  // This should never happen
-  assert(0 && "Personality function should be set!");
+  // This will happen if the current personality function is
+  // in the zero index.
   return 0;
 }
 
@@ -306,6 +307,7 @@ struct DebugLabelFolder : public MachineFunctionPass {
   DebugLabelFolder() : MachineFunctionPass(&ID) {}
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
     AU.addPreservedID(MachineLoopInfoID);
     AU.addPreservedID(MachineDominatorsID);
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -321,12 +323,12 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
   // Get machine module info.
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   if (!MMI) return false;
-  
+
   // Track if change is made.
   bool MadeChange = false;
   // No prior label to begin.
   unsigned PriorLabel = 0;
-  
+
   // Iterate through basic blocks.
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
        BB != E; ++BB) {
@@ -336,7 +338,7 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
       if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){
         // The label ID # is always operand #0, an immediate.
         unsigned NextLabel = I->getOperand(0).getImm();
-        
+
         // If there was an immediate prior label.
         if (PriorLabel) {
           // Remap the current label to prior label.
@@ -354,15 +356,14 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
         // No consecutive labels.
         PriorLabel = 0;
       }
-      
+
       ++I;
     }
   }
-  
+
   return MadeChange;
 }
 
 FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
 
 }
-
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 0000000..7a62929
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::Anchor() {}
+
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+  const MCSymbol *LHSS =
+    ((const std::pair<const MCSymbol*, const MCSymbol*>*)LHS)->first;
+  const MCSymbol *RHSS =
+    ((const std::pair<const MCSymbol*, const MCSymbol*>*)RHS)->first;
+  return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoMachO::SymbolListTy
+MachineModuleInfoMachO::GetSortedStubs(const DenseMap<const MCSymbol*, 
+                                                      const MCSymbol*> &Map) {
+  MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end());
+  if (!List.empty())
+    qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+  return List;
+}
+
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 544d83a..b31973e 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -110,11 +110,9 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
          "Invalid vreg!");
-  for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
-    // Since we are in SSA form, we can stop at the first definition.
-    if (I.getOperand().isDef())
-      return &*I;
-  }
+  // Since we are in SSA form, we can use the first definition.
+  if (!def_empty(Reg))
+    return &*def_begin(Reg);
   return 0;
 }
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 0e18fa7..0f3b33f 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -7,7 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass 
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,12 +20,14 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumSunk, "Number of machine instructions sunk");
@@ -29,9 +36,12 @@ namespace {
   class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass {
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
     MachineFunction       *CurMF; // Current MachineFunction
     MachineRegisterInfo  *RegInfo; // Machine register information
-    MachineDominatorTree *DT;   // Machine dominator tree for the current Loop
+    MachineDominatorTree *DT;   // Machine dominator tree
+    AliasAnalysis *AA;
+    BitVector AllocatableSet;   // Which physregs are allocatable?
 
   public:
     static char ID; // Pass identification
@@ -40,7 +50,9 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
     }
@@ -63,10 +75,8 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
                                              MachineBasicBlock *MBB) const {
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
-  for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg),
-       E = RegInfo->reg_end(); I != E; ++I) {
-    if (I.getOperand().isDef()) continue;  // ignore def.
-    
+  for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg),
+       E = RegInfo->use_end(); I != E; ++I) {
     // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
@@ -85,13 +95,16 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
 
 
 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "******** Machine Sinking ********\n";
+  DEBUG(errs() << "******** Machine Sinking ********\n");
   
   CurMF = &MF;
   TM = &CurMF->getTarget();
   TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
   RegInfo = &CurMF->getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
+  AA = &getAnalysis<AliasAnalysis>();
+  AllocatableSet = TRI->getAllocatableSet(*CurMF);
 
   bool EverMadeChange = false;
   
@@ -142,7 +155,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
 /// instruction out of its current block into a successor.
 bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, SawStore))
+  if (!MI->isSafeToMove(TII, SawStore, AA))
     return false;
   
   // FIXME: This should include support for sinking instructions within the
@@ -151,7 +164,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // also sink them down before their first use in the block.  This xform has to
   // be careful not to *increase* register pressure though, e.g. sinking
   // "x = y + z" down if it kills y and z would increase the live ranges of y
-  // and z only the shrink the live range of x.
+  // and z and only shrink the live range of x.
   
   // Loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
@@ -169,10 +182,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     if (Reg == 0) continue;
     
     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      // If this is a physical register use, we can't move it.  If it is a def,
-      // we can move it, but only if the def is dead.
-      if (MO.isUse() || !MO.isDead())
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!RegInfo->def_empty(Reg))
+          return false;
+        if (AllocatableSet.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!RegInfo->def_empty(AliasReg))
+            return false;
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
         return false;
+      }
     } else {
       // Virtual register uses are always safe to sink.
       if (MO.isUse()) continue;
@@ -232,15 +261,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   if (MI->getParent() == SuccToSinkTo)
     return false;
   
-  DEBUG(cerr << "Sink instr " << *MI);
-  DEBUG(cerr << "to block " << *SuccToSinkTo);
+  DEBUG(errs() << "Sink instr " << *MI);
+  DEBUG(errs() << "to block " << *SuccToSinkTo);
   
   // If the block has multiple predecessors, this would introduce computation on
   // a path that it doesn't already exist.  We could split the critical edge,
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(cerr << " *** PUNTING: Critical edge found\n");
+    DEBUG(errs() << " *** PUNTING: Critical edge found\n");
     return false;
   }
   
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index be1396c..18a3ead 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,21 +23,23 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include <fstream>
-
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -53,6 +55,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
     }
 
     bool runOnMachineFunction(MachineFunction &MF);
@@ -61,7 +64,7 @@ namespace {
     const bool allowPhysDoubleDefs;
 
     const char *const OutFileName;
-    std::ostream *OS;
+    raw_ostream *OS;
     const MachineFunction *MF;
     const TargetMachine *TM;
     const TargetRegisterInfo *TRI;
@@ -75,7 +78,8 @@ namespace {
 
     BitVector regsReserved;
     RegSet regsLive;
-    RegVector regsDefined, regsImpDefined, regsDead, regsKilled;
+    RegVector regsDefined, regsDead, regsKilled;
+    RegSet regsLiveInButUnused;
 
     // Add Reg and any sub-registers to RV
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
@@ -85,14 +89,6 @@ namespace {
           RV.push_back(*R);
     }
 
-    // Does RS contain any super-registers of Reg?
-    bool anySuperRegisters(const RegSet &RS, unsigned Reg) {
-      for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++)
-        if (RS.count(*R))
-          return true;
-      return false;
-    }
-
     struct BBInfo {
       // Is this MBB reachable from the MF entry point?
       bool reachable;
@@ -148,7 +144,7 @@ namespace {
     DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
 
     bool isReserved(unsigned Reg) {
-      return Reg < regsReserved.size() && regsReserved[Reg];
+      return Reg < regsReserved.size() && regsReserved.test(Reg);
     }
 
     void visitMachineFunctionBefore();
@@ -176,21 +172,24 @@ static RegisterPass<MachineVerifier>
 MachineVer("machineverifier", "Verify generated machine code");
 static const PassInfo *const MachineVerifyID = &MachineVer;
 
-FunctionPass *
-llvm::createMachineVerifierPass(bool allowPhysDoubleDefs)
-{
+FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
   return new MachineVerifier(allowPhysDoubleDefs);
 }
 
-bool
-MachineVerifier::runOnMachineFunction(MachineFunction &MF)
-{
-  std::ofstream OutFile;
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+  raw_ostream *OutFile = 0;
   if (OutFileName) {
-    OutFile.open(OutFileName, std::ios::out | std::ios::app);
-    OS = &OutFile;
+    std::string ErrorInfo;
+    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+                                 raw_fd_ostream::F_Append);
+    if (!ErrorInfo.empty()) {
+      errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+      exit(1);
+    }
+
+    OS = OutFile;
   } else {
-    OS = cerr.stream();
+    OS = &errs();
   }
 
   foundErrors = 0;
@@ -215,51 +214,48 @@ MachineVerifier::runOnMachineFunction(MachineFunction &MF)
   }
   visitMachineFunctionAfter();
 
-  if (OutFileName)
-    OutFile.close();
+  if (OutFile)
+    delete OutFile;
+  else if (foundErrors)
+    llvm_report_error("Found "+Twine(foundErrors)+" machine code errors.");
 
-  if (foundErrors) {
-    cerr << "\nStopping with " << foundErrors << " machine code errors.\n";
-    exit(1);
-  }
+  // Clean up.
+  regsLive.clear();
+  regsDefined.clear();
+  regsDead.clear();
+  regsKilled.clear();
+  regsLiveInButUnused.clear();
+  MBBInfoMap.clear();
 
   return false;                 // no changes
 }
 
-void
-MachineVerifier::report(const char *msg, const MachineFunction *MF)
-{
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   assert(MF);
-  *OS << "\n";
+  *OS << '\n';
   if (!foundErrors++)
-    MF->print(OS);
+    MF->print(*OS);
   *OS << "*** Bad machine code: " << msg << " ***\n"
-      << "- function:    " << MF->getFunction()->getName() << "\n";
+      << "- function:    " << MF->getFunction()->getNameStr() << "\n";
 }
 
-void
-MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB)
-{
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: " << MBB->getBasicBlock()->getName()
+  *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr()
       << " " << (void*)MBB
       << " (#" << MBB->getNumber() << ")\n";
 }
 
-void
-MachineVerifier::report(const char *msg, const MachineInstr *MI)
-{
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   assert(MI);
   report(msg, MI->getParent());
   *OS << "- instruction: ";
-  MI->print(OS, TM);
+  MI->print(*OS, TM);
 }
 
-void
-MachineVerifier::report(const char *msg,
-                        const MachineOperand *MO, unsigned MONum)
-{
+void MachineVerifier::report(const char *msg,
+                             const MachineOperand *MO, unsigned MONum) {
   assert(MO);
   report(msg, MO->getParent());
   *OS << "- operand " << MONum << ":   ";
@@ -267,9 +263,7 @@ MachineVerifier::report(const char *msg,
   *OS << "\n";
 }
 
-void
-MachineVerifier::markReachable(const MachineBasicBlock *MBB)
-{
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
   BBInfo &MInfo = MBBInfoMap[MBB];
   if (!MInfo.reachable) {
     MInfo.reachable = true;
@@ -279,16 +273,158 @@ MachineVerifier::markReachable(const MachineBasicBlock *MBB)
   }
 }
 
-void
-MachineVerifier::visitMachineFunctionBefore()
-{
+void MachineVerifier::visitMachineFunctionBefore() {
   regsReserved = TRI->getReservedRegs(*MF);
+
+  // A sub-register of a reserved register is also reserved
+  for (int Reg = regsReserved.find_first(); Reg>=0;
+       Reg = regsReserved.find_next(Reg)) {
+    for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+      // FIXME: This should probably be:
+      // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+      regsReserved.set(*Sub);
+    }
+  }
   markReachable(&MF->front());
 }
 
-void
-MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
-{
+void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  // Start with minimal CFG sanity checks.
+  MachineFunction::const_iterator MBBI = MBB;
+  ++MBBI;
+  if (MBBI != MF->end()) {
+    // Block is not last in function.
+    if (!MBB->isSuccessor(MBBI)) {
+      // Block does not fall through.
+      if (MBB->empty()) {
+        report("MBB doesn't fall through but is empty!", MBB);
+      }
+    }
+    if (TII->BlockHasNoFallThrough(*MBB)) {
+      if (MBB->empty()) {
+        report("TargetInstrInfo says the block has no fall through, but the "
+               "block is empty!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("TargetInstrInfo says the block has no fall through, but the "
+               "block does not end in a barrier!", MBB);
+      }
+    }
+  } else {
+    // Block is last in function.
+    if (MBB->empty()) {
+      report("MBB is last in function but is empty!", MBB);
+    }
+  }
+
+  // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+                          TBB, FBB, Cond)) {
+    // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+    // check whether its answers match up with reality.
+    if (!TBB && !FBB) {
+      // Block falls through to its successor.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actually fall
+        // out the bottom of the function.
+      } else if (MBB->succ_empty()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actuall fall
+        // out of the block.
+      } else if (MBB->succ_size() != 1) {
+        report("MBB exits via unconditional fall-through but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (MBB->succ_begin()[0] != MBBI) {
+        report("MBB exits via unconditional fall-through but its successor "
+               "differs from its CFG successor!", MBB);
+      }
+      if (!MBB->empty() && MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via unconditional fall-through but ends with a "
+               "barrier instruction!", MBB);
+      }
+      if (!Cond.empty()) {
+        report("MBB exits via unconditional fall-through but has a condition!",
+               MBB);
+      }
+    } else if (TBB && !FBB && Cond.empty()) {
+      // Block unconditionally branches somewhere.
+      if (MBB->succ_size() != 1) {
+        report("MBB exits via unconditional branch but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (MBB->succ_begin()[0] != TBB) {
+        report("MBB exits via unconditional branch but the CFG "
+               "successor doesn't match the actual successor!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via unconditional branch but doesn't contain "
+               "any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via unconditional branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via unconditional branch but the branch isn't a "
+               "terminator instruction!", MBB);
+      }
+    } else if (TBB && !FBB && !Cond.empty()) {
+      // Block conditionally branches somewhere, otherwise falls through.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        report("MBB conditionally falls through out of function!", MBB);
+      } if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/fall-through but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) ||
+                 (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) {
+        report("MBB exits via conditional branch/fall-through but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/fall-through but doesn't "
+               "contain any instructions!", MBB);
+      } else if (MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/fall-through but ends with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/fall-through but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+    } else if (TBB && FBB) {
+      // Block conditionally branches somewhere, otherwise branches
+      // somewhere else.
+      if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/branch but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) ||
+                 (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) {
+        report("MBB exits via conditional branch/branch but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/branch but doesn't "
+               "contain any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/branch but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+      if (Cond.empty()) {
+        report("MBB exits via conditinal branch/branch but there's no "
+               "condition!", MBB);
+      }
+    } else {
+      report("AnalyzeBranch returned invalid data!", MBB);
+    }
+  }
+
   regsLive.clear();
   for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I) {
@@ -300,32 +436,41 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
     for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
       regsLive.insert(*R);
   }
+  regsLiveInButUnused = regsLive;
+
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  assert(MFI && "Function has no frame info");
+  BitVector PR = MFI->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+    regsLive.insert(I);
+    for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+      regsLive.insert(*R);
+  }
+
   regsKilled.clear();
   regsDefined.clear();
-  regsImpDefined.clear();
 }
 
-void
-MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI)
-{
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   const TargetInstrDesc &TI = MI->getDesc();
-  if (MI->getNumExplicitOperands() < TI.getNumOperands()) {
+  if (MI->getNumOperands() < TI.getNumOperands()) {
     report("Too few operands", MI);
     *OS << TI.getNumOperands() << " operands expected, but "
         << MI->getNumExplicitOperands() << " given.\n";
   }
-  if (!TI.isVariadic()) {
-    if (MI->getNumExplicitOperands() > TI.getNumOperands()) {
-      report("Too many operands", MI);
-      *OS << TI.getNumOperands() << " operands expected, but "
-          << MI->getNumExplicitOperands() << " given.\n";
-    }
+
+  // Check the MachineMemOperands for basic consistency.
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I) {
+    if ((*I)->isLoad() && !TI.mayLoad())
+      report("Missing mayLoad flag", MI);
+    if ((*I)->isStore() && !TI.mayStore())
+      report("Missing mayStore flag", MI);
   }
 }
 
 void
-MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
-{
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const TargetInstrDesc &TI = MI->getDesc();
 
@@ -337,6 +482,16 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
       report("Explicit definition marked as use", MO, MONum);
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
+  } else if (MONum < TI.getNumOperands()) {
+    if (MO->isReg()) {
+      if (MO->isDef())
+        report("Explicit operand marked as def", MO, MONum);
+      if (MO->isImplicit())
+        report("Explicit operand marked as implicit", MO, MONum);
+    }
+  } else {
+    if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic())
+      report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
   switch (MO->getType()) {
@@ -346,18 +501,26 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
       return;
 
     // Check Live Variables.
-    if (MO->isUse()) {
+    if (MO->isUndef()) {
+      // An <undef> doesn't refer to any register, so just skip it.
+    } else if (MO->isUse()) {
+      regsLiveInButUnused.erase(Reg);
+
       if (MO->isKill()) {
         addRegWithSubRegs(regsKilled, Reg);
+        // Tied operands on two-address instuctions MUST NOT have a <kill> flag.
+        if (MI->isRegTiedToDefOperand(MONum))
+            report("Illegal kill flag on two-address instruction operand",
+                   MO, MONum);
       } else {
-        // TwoAddress instr modyfying a reg is treated as kill+def.
+        // TwoAddress instr modifying a reg is treated as kill+def.
         unsigned defIdx;
         if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
             MI->getOperand(defIdx).getReg() == Reg)
           addRegWithSubRegs(regsKilled, Reg);
       }
-      // Explicit use of a dead register.
-      if (!MO->isImplicit() && !regsLive.count(Reg)) {
+      // Use of a dead register.
+      if (!regsLive.count(Reg)) {
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
           // Reserved registers may be used even when 'dead'.
           if (!isReserved(Reg))
@@ -374,15 +537,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
         }
       }
     } else {
+      assert(MO->isDef());
       // Register defined.
       // TODO: verify that earlyclobber ops are not used.
-      if (MO->isImplicit())
-        addRegWithSubRegs(regsImpDefined, Reg);
-      else
-        addRegWithSubRegs(regsDefined, Reg);
-
       if (MO->isDead())
         addRegWithSubRegs(regsDead, Reg);
+      else
+        addRegWithSubRegs(regsDefined, Reg);
     }
 
     // Check register classes.
@@ -401,8 +562,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
           }
           sr = s;
         }
-        if (TOI.RegClass) {
-          const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
           if (!DRC->contains(sr)) {
             report("Illegal physical register for instruction", MO, MONum);
             *OS << TRI->getName(sr) << " is not a "
@@ -419,8 +579,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
           }
           RC = *(RC->subregclasses_begin()+SubIdx);
         }
-        if (TOI.RegClass) {
-          const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
           if (RC != DRC && !RC->hasSuperClass(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
             *OS << "Expected a " << DRC->getName() << " register, but got a "
@@ -431,34 +590,35 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
     }
     break;
   }
-    // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do.
-    // case MachineOperand::MO_MachineBasicBlock:
-    //   if (MI->getOpcode() == TargetInstrInfo::PHI) {
-    //     if (!MO->getMBB()->isSuccessor(MI->getParent()))
-    //       report("PHI operand is not in the CFG", MO, MONum);
-    //   }
-    //   break;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    if (MI->getOpcode() == TargetInstrInfo::PHI) {
+      if (!MO->getMBB()->isSuccessor(MI->getParent()))
+        report("PHI operand is not in the CFG", MO, MONum);
+    }
+    break;
+
   default:
     break;
   }
 }
 
-void
-MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI)
-{
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
   BBInfo &MInfo = MBBInfoMap[MI->getParent()];
   set_union(MInfo.regsKilled, regsKilled);
   set_subtract(regsLive, regsKilled);
   regsKilled.clear();
 
-  for (RegVector::const_iterator I = regsDefined.begin(),
-         E = regsDefined.end(); I != E; ++I) {
+  // Verify that both <def> and <def,dead> operands refer to dead registers.
+  RegVector defs(regsDefined);
+  defs.append(regsDead.begin(), regsDead.end());
+
+  for (RegVector::const_iterator I = defs.begin(), E = defs.end();
+       I != E; ++I) {
     if (regsLive.count(*I)) {
       if (TargetRegisterInfo::isPhysicalRegister(*I)) {
-        // We allow double defines to physical registers with live
-        // super-registers.
         if (!allowPhysDoubleDefs && !isReserved(*I) &&
-            !anySuperRegisters(regsLive, *I)) {
+            !regsLiveInButUnused.count(*I)) {
           report("Redefining a live physical register", MI);
           *OS << "Register " << TRI->getName(*I)
               << " was defined but already live.\n";
@@ -478,14 +638,12 @@ MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI)
     }
   }
 
-  set_union(regsLive, regsDefined); regsDefined.clear();
-  set_union(regsLive, regsImpDefined); regsImpDefined.clear();
   set_subtract(regsLive, regsDead); regsDead.clear();
+  set_union(regsLive, regsDefined); regsDefined.clear();
 }
 
 void
-MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB)
-{
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
   MBBInfoMap[MBB].regsLiveOut = regsLive;
   regsLive.clear();
 }
@@ -493,9 +651,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB)
 // Calculate the largest possible vregsPassed sets. These are the registers that
 // can pass through an MBB live, but may not be live every time. It is assumed
 // that all vregsPassed sets are empty before the call.
-void
-MachineVerifier::calcMaxRegsPassed()
-{
+void MachineVerifier::calcMaxRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
   DenseSet<const MachineBasicBlock*> todo;
@@ -533,9 +689,7 @@ MachineVerifier::calcMaxRegsPassed()
 // Calculate the minimum vregsPassed set. These are the registers that always
 // pass live through an MBB. The calculation assumes that calcMaxRegsPassed has
 // been called earlier.
-void
-MachineVerifier::calcMinRegsPassed()
-{
+void MachineVerifier::calcMinRegsPassed() {
   DenseSet<const MachineBasicBlock*> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI)
@@ -570,9 +724,7 @@ MachineVerifier::calcMinRegsPassed()
 
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
-void
-MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB)
-{
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
     DenseSet<const MachineBasicBlock*> seen;
@@ -601,9 +753,7 @@ MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB)
   }
 }
 
-void
-MachineVerifier::visitMachineFunctionAfter()
-{
+void MachineVerifier::visitMachineFunctionAfter() {
   calcMaxRegsPassed();
 
   // With the maximal set of vregsPassed we can verify dead-in registers.
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
new file mode 100644
index 0000000..cf05275
--- /dev/null
+++ b/lib/CodeGen/ObjectCodeEmitter.cpp
@@ -0,0 +1,141 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+
+//===----------------------------------------------------------------------===//
+//                       ObjectCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
+ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
+ObjectCodeEmitter::~ObjectCodeEmitter() {}
+
+/// setBinaryObject - set the BinaryObject we are writting to
+void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
+
+/// emitByte - This callback is invoked when a byte needs to be
+/// written to the data stream, without buffer overflow testing.
+void ObjectCodeEmitter::emitByte(uint8_t B) {
+  BO->emitByte(B);
+}
+
+/// emitWordLE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitWordLE(uint32_t W) {
+  BO->emitWordLE(W);
+}
+
+/// emitWordBE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitWordBE(uint32_t W) {
+  BO->emitWordBE(W);
+}
+
+/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
+  BO->emitDWordLE(W);
+}
+
+/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
+  BO->emitDWordBE(W);
+}
+
+/// emitAlignment - Align 'BO' to the necessary alignment boundary.
+void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
+                                      uint8_t fill /* 0 */) {
+  BO->emitAlignment(Alignment, fill);
+}
+
+/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
+  BO->emitULEB128Bytes(Value);
+}
+
+/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
+  BO->emitSLEB128Bytes(Value);
+}
+
+/// emitString - This callback is invoked when a String needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitString(const std::string &String) {
+  BO->emitString(String);
+}
+
+/// getCurrentPCValue - This returns the address that the next emitted byte
+/// will be output to.
+uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// getCurrentPCOffset - Return the offset from the start of the emitted
+/// buffer that we are currently writing to.
+uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// addRelocation - Whenever a relocatable address is needed, it should be
+/// noted with this interface.
+void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
+  BO->addRelocation(relocation);
+}
+
+/// StartMachineBasicBlock - This should be called by the target when a new
+/// basic block is about to be emitted.  This way the MCE knows where the
+/// start of the block is, and can implement getMachineBasicBlockAddress.
+void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
+  if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+    MBBLocations.resize((MBB->getNumber()+1)*2);
+  MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+}
+
+/// getMachineBasicBlockAddress - Return the address of the specified
+/// MachineBasicBlock, only usable after the label for the MBB has been
+/// emitted.
+uintptr_t
+ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+  assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+         MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+  return MBBLocations[MBB->getNumber()];
+}
+
+/// getJumpTableEntryAddress - Return the address of the jump table with index
+/// 'Index' in the function that last called initJumpTableInfo.
+uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
+  assert(JTLocations.size() > Index && "JT not emitted!");
+  return JTLocations[Index];
+}
+
+/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
+  assert(CPLocations.size() > Index && "CP not emitted!");
+  return CPLocations[Index];
+}
+
+/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
+  assert(CPSections.size() > Index && "CP not emitted!");
+  return CPSections[Index];
+}
+
+} // end namespace llvm
+
diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h
new file mode 100644
index 0000000..904061c
--- /dev/null
+++ b/lib/CodeGen/PBQP/AnnotatedGraph.h
@@ -0,0 +1,184 @@
+//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Annotated PBQP Graph class. This class is used internally by the PBQP solver
+// to cache information to speed up reduction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
+#define LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
+
+#include "GraphBase.h"
+
+namespace PBQP {
+
+
+template <typename NodeData, typename EdgeData> class AnnotatedEdge;
+
+template <typename NodeData, typename EdgeData>
+class AnnotatedNode : public NodeBase<AnnotatedNode<NodeData, EdgeData>,
+                                      AnnotatedEdge<NodeData, EdgeData> > {
+private:
+
+  NodeData nodeData; 
+
+public:
+
+  AnnotatedNode(const Vector &costs, const NodeData &nodeData) :
+    NodeBase<AnnotatedNode<NodeData, EdgeData>,
+             AnnotatedEdge<NodeData, EdgeData> >(costs),
+             nodeData(nodeData) {}
+
+  NodeData& getNodeData() { return nodeData; }
+  const NodeData& getNodeData() const { return nodeData; }
+
+};
+
+template <typename NodeData, typename EdgeData>
+class AnnotatedEdge : public EdgeBase<AnnotatedNode<NodeData, EdgeData>,
+                                      AnnotatedEdge<NodeData, EdgeData> > {
+private:
+
+  typedef typename GraphBase<AnnotatedNode<NodeData, EdgeData>,
+                             AnnotatedEdge<NodeData, EdgeData> >::NodeIterator
+    NodeIterator;
+
+  EdgeData edgeData; 
+
+public:
+
+
+  AnnotatedEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
+                const Matrix &costs, const EdgeData &edgeData) :
+    EdgeBase<AnnotatedNode<NodeData, EdgeData>,
+             AnnotatedEdge<NodeData, EdgeData> >(node1Itr, node2Itr, costs),
+    edgeData(edgeData) {}
+
+  EdgeData& getEdgeData() { return edgeData; }
+  const EdgeData& getEdgeData() const { return edgeData; }
+
+};
+
+template <typename NodeData, typename EdgeData>
+class AnnotatedGraph : public GraphBase<AnnotatedNode<NodeData, EdgeData>,
+                                        AnnotatedEdge<NodeData, EdgeData> > {
+private:
+
+  typedef GraphBase<AnnotatedNode<NodeData, EdgeData>,
+                    AnnotatedEdge<NodeData, EdgeData> > PGraph;
+
+  typedef AnnotatedNode<NodeData, EdgeData> NodeEntry;
+  typedef AnnotatedEdge<NodeData, EdgeData> EdgeEntry;
+
+
+  void copyFrom(const AnnotatedGraph &other) {
+    if (!other.areNodeIDsValid()) {
+      other.assignNodeIDs();
+    }
+    std::vector<NodeIterator> newNodeItrs(other.getNumNodes());
+
+    for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr));
+    }
+
+    for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      unsigned node1ID = other.getNodeID(other.getEdgeNode1(eItr)),
+               node2ID = other.getNodeID(other.getEdgeNode2(eItr));
+
+      addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
+              other.getEdgeCosts(eItr), other.getEdgeData(eItr));
+    }
+
+  }
+
+public:
+
+  typedef typename PGraph::NodeIterator NodeIterator;
+  typedef typename PGraph::ConstNodeIterator ConstNodeIterator;
+  typedef typename PGraph::EdgeIterator EdgeIterator;
+  typedef typename PGraph::ConstEdgeIterator ConstEdgeIterator;
+
+  AnnotatedGraph() {}
+
+  AnnotatedGraph(const AnnotatedGraph &other) {
+    copyFrom(other);
+  }
+
+  AnnotatedGraph& operator=(const AnnotatedGraph &other) {
+    PGraph::clear();
+    copyFrom(other);
+    return *this;
+  }
+
+  NodeIterator addNode(const Vector &costs, const NodeData &data) {
+    return PGraph::addConstructedNode(NodeEntry(costs, data));
+  }
+
+  EdgeIterator addEdge(const NodeIterator &node1Itr,
+                       const NodeIterator &node2Itr,
+                       const Matrix &costs, const EdgeData &data) {
+    return PGraph::addConstructedEdge(EdgeEntry(node1Itr, node2Itr,
+                                                costs, data));
+  }
+
+  NodeData& getNodeData(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).getNodeData();
+  }
+
+  const NodeData& getNodeData(const NodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).getNodeData();
+  }
+
+  EdgeData& getEdgeData(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getEdgeData();
+  }
+
+  const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getEdgeData();
+  }
+
+  SimpleGraph toSimpleGraph() const {
+    SimpleGraph g;
+
+    if (!PGraph::areNodeIDsValid()) {
+      PGraph::assignNodeIDs();
+    }
+    std::vector<SimpleGraph::NodeIterator> newNodeItrs(PGraph::getNumNodes());
+
+    for (ConstNodeIterator nItr = PGraph::nodesBegin(), 
+         nEnd = PGraph::nodesEnd();
+         nItr != nEnd; ++nItr) {
+
+      newNodeItrs[getNodeID(nItr)] = g.addNode(getNodeCosts(nItr));
+    }
+
+    for (ConstEdgeIterator
+         eItr = PGraph::edgesBegin(), eEnd = PGraph::edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      unsigned node1ID = getNodeID(getEdgeNode1(eItr)),
+               node2ID = getNodeID(getEdgeNode2(eItr));
+
+        g.addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
+                  getEdgeCosts(eItr));
+    }
+
+    return g;
+  }
+
+};
+
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h
new file mode 100644
index 0000000..b2f2e6f
--- /dev/null
+++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h
@@ -0,0 +1,110 @@
+//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Uses a trivial brute force algorithm to solve a PBQP problem.
+// PBQP is NP-HARD - This solver should only be used for debugging small
+// problems.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H
+#define LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H
+
+#include "Solver.h"
+
+namespace PBQP {
+
+/// A brute force PBQP solver. This solver takes exponential time. It should
+/// only be used for debugging purposes. 
+class ExhaustiveSolverImpl {
+private:
+
+  const SimpleGraph &g;
+
+  PBQPNum getSolutionCost(const Solution &solution) const {
+    PBQPNum cost = 0.0;
+    
+    for (SimpleGraph::ConstNodeIterator
+         nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+      
+      unsigned nodeId = g.getNodeID(nodeItr);
+
+      cost += g.getNodeCosts(nodeItr)[solution.getSelection(nodeId)];
+    }
+
+    for (SimpleGraph::ConstEdgeIterator
+         edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+      
+      SimpleGraph::ConstNodeIterator n1 = g.getEdgeNode1Itr(edgeItr),
+                                     n2 = g.getEdgeNode2Itr(edgeItr);
+      unsigned sol1 = solution.getSelection(g.getNodeID(n1)),
+               sol2 = solution.getSelection(g.getNodeID(n2));
+
+      cost += g.getEdgeCosts(edgeItr)[sol1][sol2];
+    }
+
+    return cost;
+  }
+
+public:
+
+  ExhaustiveSolverImpl(const SimpleGraph &g) : g(g) {}
+
+  Solution solve() const {
+    Solution current(g.getNumNodes(), true), optimal(current);
+
+    PBQPNum bestCost = std::numeric_limits<PBQPNum>::infinity();
+    bool finished = false;
+
+    while (!finished) {
+      PBQPNum currentCost = getSolutionCost(current);
+
+      if (currentCost < bestCost) {
+        optimal = current;
+        bestCost = currentCost;
+      }
+
+      // assume we're done.
+      finished = true;
+
+      for (unsigned i = 0; i < g.getNumNodes(); ++i) {
+        if (current.getSelection(i) ==
+            (g.getNodeCosts(g.getNodeItr(i)).getLength() - 1)) {
+          current.setSelection(i, 0);
+        }
+        else {
+          current.setSelection(i, current.getSelection(i) + 1);
+          finished = false;
+          break;
+        }
+      }
+
+    }
+
+    optimal.setSolutionCost(bestCost);
+
+    return optimal;
+  }
+
+};
+
+class ExhaustiveSolver : public Solver {
+public:
+  ~ExhaustiveSolver() {}
+  Solution solve(const SimpleGraph &g) const {
+    ExhaustiveSolverImpl solver(g);
+    return solver.solve();
+  }
+};
+
+}
+
+#endif // LLVM_CODGEN_PBQP_EXHAUSTIVESOLVER_HPP
diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h
new file mode 100644
index 0000000..cc3e017
--- /dev/null
+++ b/lib/CodeGen/PBQP/GraphBase.h
@@ -0,0 +1,582 @@
+//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Base class for PBQP Graphs.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_GRAPHBASE_H
+#define LLVM_CODEGEN_PBQP_GRAPHBASE_H
+
+#include "PBQPMath.h"
+
+#include <list>
+#include <vector>
+
+namespace PBQP {
+
+// UGLY, but I'm not sure there's a good way around this: We need to be able to
+// look up a Node's "adjacent edge list" structure type before the Node type is
+// fully constructed.  We can enable this by pushing the choice of data type
+// out into this traits class.
+template <typename Graph>
+class NodeBaseTraits {
+  public:
+    typedef std::list<typename Graph::EdgeIterator> AdjEdgeList;
+    typedef typename AdjEdgeList::iterator AdjEdgeIterator;
+    typedef typename AdjEdgeList::const_iterator ConstAdjEdgeIterator;
+};
+
+/// \brief Base for concrete graph classes. Provides a basic set of graph
+///        operations which are useful for PBQP solvers.
+template <typename NodeEntry, typename EdgeEntry>
+class GraphBase {
+private:
+
+  typedef GraphBase<NodeEntry, EdgeEntry> ThisGraphT;
+
+  typedef std::list<NodeEntry> NodeList;
+  typedef std::list<EdgeEntry> EdgeList;
+
+  NodeList nodeList;
+  unsigned nodeListSize;
+
+  EdgeList edgeList;
+  unsigned edgeListSize;
+
+  GraphBase(const ThisGraphT &other) { abort(); }
+  void operator=(const ThisGraphT &other) { abort(); } 
+  
+public:
+
+  /// \brief Iterates over the nodes of a graph.
+  typedef typename NodeList::iterator NodeIterator;
+  /// \brief Iterates over the nodes of a const graph.
+  typedef typename NodeList::const_iterator ConstNodeIterator;
+  /// \brief Iterates over the edges of a graph.
+  typedef typename EdgeList::iterator EdgeIterator;
+  /// \brief Iterates over the edges of a const graph.
+  typedef typename EdgeList::const_iterator ConstEdgeIterator;
+
+  /// \brief Iterates over the edges attached to a node.
+  typedef typename NodeBaseTraits<ThisGraphT>::AdjEdgeIterator
+    AdjEdgeIterator;
+
+  /// \brief Iterates over the edges attached to a node in a const graph.
+  typedef typename NodeBaseTraits<ThisGraphT>::ConstAdjEdgeIterator
+    ConstAdjEdgeIterator;
+
+private:
+
+  typedef std::vector<NodeIterator> IDToNodeMap;
+
+  IDToNodeMap idToNodeMap;
+  bool nodeIDsValid;
+
+  void invalidateNodeIDs() {
+    if (nodeIDsValid) {
+      idToNodeMap.clear();
+      nodeIDsValid = false;
+    }
+  }
+
+  template <typename ItrT>
+  bool iteratorInRange(ItrT itr, const ItrT &begin, const ItrT &end) {
+    for (ItrT t = begin; t != end; ++t) {
+      if (itr == t)
+        return true;
+    }
+
+    return false;
+  }
+
+protected:
+
+  GraphBase() : nodeListSize(0), edgeListSize(0), nodeIDsValid(false) {}
+  
+  NodeEntry& getNodeEntry(const NodeIterator &nodeItr) { return *nodeItr; }
+  const NodeEntry& getNodeEntry(const ConstNodeIterator &nodeItr) const {
+    return *nodeItr;
+  }
+
+  EdgeEntry& getEdgeEntry(const EdgeIterator &edgeItr) { return *edgeItr; }
+  const EdgeEntry& getEdgeEntry(const ConstEdgeIterator &edgeItr) const {
+    return *edgeItr;
+  }
+
+  NodeIterator addConstructedNode(const NodeEntry &nodeEntry) {
+    ++nodeListSize;
+
+    invalidateNodeIDs();
+
+    NodeIterator newNodeItr = nodeList.insert(nodeList.end(), nodeEntry);
+
+    return newNodeItr;
+  }
+
+  EdgeIterator addConstructedEdge(const EdgeEntry &edgeEntry) {
+
+    assert((findEdge(edgeEntry.getNode1Itr(), edgeEntry.getNode2Itr())
+          == edgeList.end()) && "Attempt to add duplicate edge.");
+
+    ++edgeListSize;
+
+    // Add the edge to the graph.
+    EdgeIterator edgeItr = edgeList.insert(edgeList.end(), edgeEntry);
+
+    // Get a reference to the version in the graph.
+    EdgeEntry &newEdgeEntry = getEdgeEntry(edgeItr);
+
+    // Node entries:
+    NodeEntry &node1Entry = getNodeEntry(newEdgeEntry.getNode1Itr()),
+              &node2Entry = getNodeEntry(newEdgeEntry.getNode2Itr());
+
+    // Sanity check on matrix dimensions.
+    assert((node1Entry.getCosts().getLength() == 
+            newEdgeEntry.getCosts().getRows()) && 
+           (node2Entry.getCosts().getLength() == 
+            newEdgeEntry.getCosts().getCols()) &&
+        "Matrix dimensions do not match cost vector dimensions.");
+
+    // Create links between nodes and edges.
+    newEdgeEntry.setNode1ThisEdgeItr(
+        node1Entry.addAdjEdge(edgeItr));
+    newEdgeEntry.setNode2ThisEdgeItr(
+        node2Entry.addAdjEdge(edgeItr));
+
+    return edgeItr;
+  }
+
+public:
+
+  /// \brief Returns the number of nodes in this graph.
+  unsigned getNumNodes() const { return nodeListSize; }
+
+  /// \brief Returns the number of edges in this graph.
+  unsigned getNumEdges() const { return edgeListSize; } 
+
+  /// \brief Return the cost vector for the given node.
+  Vector& getNodeCosts(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).getCosts();
+  }
+
+  /// \brief Return the cost vector for the give node. 
+  const Vector& getNodeCosts(const ConstNodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).getCosts();
+  }
+
+  /// \brief Return the degree of the given node.
+  unsigned getNodeDegree(const NodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).getDegree();
+  }
+
+  /// \brief Assigns sequential IDs to the nodes, starting at 0, which
+  /// remain valid until the next addition or removal of a node.
+  void assignNodeIDs() {
+    unsigned curID = 0;
+    idToNodeMap.resize(getNumNodes());
+    for (NodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr, ++curID) {
+      getNodeEntry(nodeItr).setID(curID);
+      idToNodeMap[curID] = nodeItr;
+    }
+    nodeIDsValid = true;
+  }
+
+  /// \brief Assigns sequential IDs to the nodes using the ordering of the
+  /// given vector.
+  void assignNodeIDs(const std::vector<NodeIterator> &nodeOrdering) {
+    assert((getNumNodes() == nodeOrdering.size()) && 
+           "Wrong number of nodes in node ordering.");
+    idToNodeMap = nodeOrdering;
+    for (unsigned nodeID = 0; nodeID < idToNodeMap.size(); ++nodeID) {
+      getNodeEntry(idToNodeMap[nodeID]).setID(nodeID);
+    }
+    nodeIDsValid = true;
+  }
+
+  /// \brief Returns true if valid node IDs are assigned, false otherwise.
+  bool areNodeIDsValid() const { return nodeIDsValid; }
+
+  /// \brief Return the numeric ID of the given node.
+  ///
+  /// Calls to this method will result in an assertion failure if there have
+  /// been any node additions or removals since the last call to
+  /// assignNodeIDs().
+  unsigned getNodeID(const ConstNodeIterator &nodeItr) const {
+    assert(nodeIDsValid && "Attempt to retrieve invalid ID.");
+    return getNodeEntry(nodeItr).getID();
+  }
+
+  /// \brief Returns the iterator associated with the given node ID.
+  NodeIterator getNodeItr(unsigned nodeID) {
+    assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID.");
+    return idToNodeMap[nodeID];
+  }
+
+  /// \brief Returns the iterator associated with the given node ID.
+  ConstNodeIterator getNodeItr(unsigned nodeID) const {
+    assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID.");
+    return idToNodeMap[nodeID];
+  }
+
+  /// \brief Removes the given node (and all attached edges) from the graph.
+  void removeNode(const NodeIterator &nodeItr) {
+    assert(iteratorInRange(nodeItr, nodeList.begin(), nodeList.end()) &&
+           "Iterator does not belong to this graph!");
+
+    invalidateNodeIDs();
+    
+    NodeEntry &nodeEntry = getNodeEntry(nodeItr);
+
+    // We need to copy this out because it will be destroyed as the edges are
+    // removed.
+    typedef std::vector<EdgeIterator> AdjEdgeList;
+    typedef typename AdjEdgeList::iterator AdjEdgeListItr;
+
+    AdjEdgeList adjEdges;
+    adjEdges.reserve(nodeEntry.getDegree());
+    std::copy(nodeEntry.adjEdgesBegin(), nodeEntry.adjEdgesEnd(),
+              std::back_inserter(adjEdges));
+
+    // Iterate over the copied out edges and remove them from the graph.
+    for (AdjEdgeListItr itr = adjEdges.begin(), end = adjEdges.end();
+         itr != end; ++itr) {
+      removeEdge(*itr);
+    }
+
+    // Erase the node from the nodelist.
+    nodeList.erase(nodeItr);
+    --nodeListSize;
+  }
+
+  NodeIterator nodesBegin() { return nodeList.begin(); }
+  ConstNodeIterator nodesBegin() const { return nodeList.begin(); }
+  NodeIterator nodesEnd() { return nodeList.end(); }
+  ConstNodeIterator nodesEnd() const { return nodeList.end(); }
+
+  AdjEdgeIterator adjEdgesBegin(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).adjEdgesBegin();
+  }
+
+  ConstAdjEdgeIterator adjEdgesBegin(const ConstNodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).adjEdgesBegin();
+  }
+
+  AdjEdgeIterator adjEdgesEnd(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).adjEdgesEnd();
+  }
+  
+  ConstAdjEdgeIterator adjEdgesEnd(const ConstNodeIterator &nodeItr) const {
+    getNodeEntry(nodeItr).adjEdgesEnd();
+  }
+
+  EdgeIterator findEdge(const NodeIterator &node1Itr,
+                        const NodeIterator &node2Itr) {
+
+    for (AdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr),
+         adjEdgeEnd = adjEdgesEnd(node1Itr);
+         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+      if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) ||
+          (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) {
+        return *adjEdgeItr;
+      }
+    }
+
+    return edgeList.end();
+  }
+
+  ConstEdgeIterator findEdge(const ConstNodeIterator &node1Itr,
+                             const ConstNodeIterator &node2Itr) const {
+
+    for (ConstAdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr),
+         adjEdgeEnd = adjEdgesEnd(node1Itr);
+         adjEdgeItr != adjEdgesEnd; ++adjEdgeItr) {
+      if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) ||
+          (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) {
+        return *adjEdgeItr;
+      }
+    }
+
+    return edgeList.end();
+  }
+
+  Matrix& getEdgeCosts(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getCosts();
+  }
+
+  const Matrix& getEdgeCosts(const ConstEdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getCosts();
+  }
+
+  NodeIterator getEdgeNode1Itr(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getNode1Itr();
+  }
+
+  ConstNodeIterator getEdgeNode1Itr(const ConstEdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getNode1Itr();
+  }
+
+  NodeIterator getEdgeNode2Itr(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getNode2Itr();
+  }
+
+  ConstNodeIterator getEdgeNode2Itr(const ConstEdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getNode2Itr();
+  }
+
+  NodeIterator getEdgeOtherNode(const EdgeIterator &edgeItr,
+                                const NodeIterator &nodeItr) {
+
+    EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
+    if (nodeItr == edgeEntry.getNode1Itr()) {
+      return edgeEntry.getNode2Itr();
+    }
+    //else
+    return edgeEntry.getNode1Itr();
+  }
+
+  ConstNodeIterator getEdgeOtherNode(const ConstEdgeIterator &edgeItr,
+                                     const ConstNodeIterator &nodeItr) const {
+
+    const EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
+    if (nodeItr == edgeEntry.getNode1Itr()) {
+      return edgeEntry.getNode2Itr();
+    }
+    //else
+    return edgeEntry.getNode1Itr();
+  }
+
+  void removeEdge(const EdgeIterator &edgeItr) {
+    assert(iteratorInRange(edgeItr, edgeList.begin(), edgeList.end()) &&
+           "Iterator does not belong to this graph!");
+
+    --edgeListSize;
+
+    // Get the edge entry.
+    EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
+
+    // Get the nodes entry.
+    NodeEntry &node1Entry(getNodeEntry(edgeEntry.getNode1Itr())),
+              &node2Entry(getNodeEntry(edgeEntry.getNode2Itr()));
+
+    // Disconnect the edge from the nodes.
+    node1Entry.removeAdjEdge(edgeEntry.getNode1ThisEdgeItr());
+    node2Entry.removeAdjEdge(edgeEntry.getNode2ThisEdgeItr());
+
+    // Remove the edge from the graph.
+    edgeList.erase(edgeItr);
+  }
+
+  EdgeIterator edgesBegin() { return edgeList.begin(); }
+  ConstEdgeIterator edgesBegin() const { return edgeList.begin(); }
+  EdgeIterator edgesEnd() { return edgeList.end(); }
+  ConstEdgeIterator edgesEnd() const { return edgeList.end(); }
+
+  void clear() {
+    nodeList.clear();
+    nodeListSize = 0;
+    edgeList.clear();
+    edgeListSize = 0;
+    idToNodeMap.clear();
+  }
+
+  template <typename OStream>
+  void printDot(OStream &os) const {
+    
+    assert(areNodeIDsValid() &&
+           "Cannot print a .dot of a graph unless IDs have been assigned.");
+    
+    os << "graph {\n";
+
+    for (ConstNodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      os << "  node" << getNodeID(nodeItr) << " [ label=\""
+         << getNodeID(nodeItr) << ": " << getNodeCosts(nodeItr) << "\" ]\n";
+    }
+
+    os << "  edge [ len=" << getNumNodes() << " ]\n";
+
+    for (ConstEdgeIterator edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      os << "  node" << getNodeID(getEdgeNode1Itr(edgeItr))
+         << " -- node" << getNodeID(getEdgeNode2Itr(edgeItr))
+         << " [ label=\"";
+
+      const Matrix &edgeCosts = getEdgeCosts(edgeItr);
+
+      for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
+        os << edgeCosts.getRowAsVector(i) << "\\n";
+      }
+
+      os << "\" ]\n";
+    }
+
+    os << "}\n";
+  }
+
+  template <typename OStream>
+  void printDot(OStream &os) {
+    if (!areNodeIDsValid()) {
+      assignNodeIDs();
+    }
+
+    const_cast<const ThisGraphT*>(this)->printDot(os);
+  }
+
+  template <typename OStream>
+  void dumpTo(OStream &os) const {
+    typedef ConstNodeIterator ConstNodeID;
+    
+    assert(areNodeIDsValid() &&
+           "Cannot dump a graph unless IDs have been assigned.");
+
+    for (ConstNodeIterator nItr = nodesBegin(), nEnd = nodesEnd();
+         nItr != nEnd; ++nItr) {
+      os << getNodeID(nItr) << "\n";
+    }
+
+    unsigned edgeNumber = 1;
+    for (ConstEdgeIterator eItr = edgesBegin(), eEnd = edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      os << edgeNumber++ << ": { "
+         << getNodeID(getEdgeNode1Itr(eItr)) << ", "
+         << getNodeID(getEdgeNode2Itr(eItr)) << " }\n";
+    }
+
+  }
+
+  template <typename OStream>
+  void dumpTo(OStream &os) {
+    if (!areNodeIDsValid()) {
+      assignNodeIDs();
+    }
+
+    const_cast<const ThisGraphT*>(this)->dumpTo(os);
+  }
+
+};
+
+/// \brief Provides a base from which to derive nodes for GraphBase.
+template <typename NodeImpl, typename EdgeImpl>
+class NodeBase {
+private:
+
+  typedef GraphBase<NodeImpl, EdgeImpl> GraphBaseT;
+  typedef NodeBaseTraits<GraphBaseT> ThisNodeBaseTraits;
+
+public:
+  typedef typename GraphBaseT::EdgeIterator EdgeIterator;
+
+private:
+  typedef typename ThisNodeBaseTraits::AdjEdgeList AdjEdgeList;
+
+  unsigned degree, id;
+  Vector costs;
+  AdjEdgeList adjEdges;
+
+  void operator=(const NodeBase& other) {
+    assert(false && "Can't assign NodeEntrys.");
+  }
+
+public:
+
+  typedef typename ThisNodeBaseTraits::AdjEdgeIterator AdjEdgeIterator;
+  typedef typename ThisNodeBaseTraits::ConstAdjEdgeIterator
+    ConstAdjEdgeIterator;
+
+  NodeBase(const Vector &costs) : degree(0), costs(costs) {
+    assert((costs.getLength() > 0) && "Can't have zero-length cost vector.");
+  }
+
+  Vector& getCosts() { return costs; }
+  const Vector& getCosts() const { return costs; }
+
+  unsigned getDegree() const { return degree;  }
+
+  void setID(unsigned id) { this->id = id; }
+  unsigned getID() const { return id; }
+
+  AdjEdgeIterator addAdjEdge(const EdgeIterator &edgeItr) {
+    ++degree;
+    return adjEdges.insert(adjEdges.end(), edgeItr);
+  }
+
+  void removeAdjEdge(const AdjEdgeIterator &adjEdgeItr) {
+    --degree;
+    adjEdges.erase(adjEdgeItr);
+  }
+
+  AdjEdgeIterator adjEdgesBegin() { return adjEdges.begin(); } 
+  ConstAdjEdgeIterator adjEdgesBegin() const { return adjEdges.begin(); }
+  AdjEdgeIterator adjEdgesEnd() { return adjEdges.end(); }
+  ConstAdjEdgeIterator adjEdgesEnd() const { return adjEdges.end(); }
+
+};
+
+template <typename NodeImpl, typename EdgeImpl>
+class EdgeBase {
+public:
+  typedef typename GraphBase<NodeImpl, EdgeImpl>::NodeIterator NodeIterator;
+  typedef typename GraphBase<NodeImpl, EdgeImpl>::EdgeIterator EdgeIterator;
+
+  typedef typename NodeImpl::AdjEdgeIterator NodeAdjEdgeIterator;
+
+private:
+
+  NodeIterator node1Itr, node2Itr;
+  NodeAdjEdgeIterator node1ThisEdgeItr, node2ThisEdgeItr;
+  Matrix costs;
+
+  void operator=(const EdgeBase &other) {
+    assert(false && "Can't assign EdgeEntrys.");
+  }
+
+public:
+
+  EdgeBase(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
+           const Matrix &costs) :
+    node1Itr(node1Itr), node2Itr(node2Itr), costs(costs) {
+
+    assert((costs.getRows() > 0) && (costs.getCols() > 0) &&
+           "Can't have zero-dimensioned cost matrices");
+  }
+
+  Matrix& getCosts() { return costs; }
+  const Matrix& getCosts() const { return costs; }
+
+  const NodeIterator& getNode1Itr() const { return node1Itr; }
+  const NodeIterator& getNode2Itr() const { return node2Itr; }
+
+  void setNode1ThisEdgeItr(const NodeAdjEdgeIterator &node1ThisEdgeItr) {
+    this->node1ThisEdgeItr = node1ThisEdgeItr;
+  }
+
+  const NodeAdjEdgeIterator& getNode1ThisEdgeItr() const {
+    return node1ThisEdgeItr;
+  }
+
+  void setNode2ThisEdgeItr(const NodeAdjEdgeIterator &node2ThisEdgeItr) {
+    this->node2ThisEdgeItr = node2ThisEdgeItr;
+  }
+
+  const NodeAdjEdgeIterator& getNode2ThisEdgeItr() const {
+    return node2ThisEdgeItr;
+  }
+
+};
+
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_GRAPHBASE_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
new file mode 100644
index 0000000..e786246
--- /dev/null
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -0,0 +1,789 @@
+//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Heuristic PBQP solver. This solver is able to perform optimal reductions for
+// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
+// used to to select a node for reduction. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+
+#include "Solver.h"
+#include "AnnotatedGraph.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+
+namespace PBQP {
+
+/// \brief Important types for the HeuristicSolverImpl.
+/// 
+/// Declared seperately to allow access to heuristic classes before the solver
+/// is fully constructed.
+template <typename HeuristicNodeData, typename HeuristicEdgeData>
+class HSITypes {
+public:
+
+  class NodeData;
+  class EdgeData;
+
+  typedef AnnotatedGraph<NodeData, EdgeData> SolverGraph;
+  typedef typename SolverGraph::NodeIterator GraphNodeIterator;
+  typedef typename SolverGraph::EdgeIterator GraphEdgeIterator;
+  typedef typename SolverGraph::AdjEdgeIterator GraphAdjEdgeIterator;
+
+  typedef std::list<GraphNodeIterator> NodeList;
+  typedef typename NodeList::iterator NodeListIterator;
+
+  typedef std::vector<GraphNodeIterator> NodeStack;
+  typedef typename NodeStack::iterator NodeStackIterator;
+
+  class NodeData {
+    friend class EdgeData;
+
+  private:
+
+    typedef std::list<GraphEdgeIterator> LinksList;
+
+    unsigned numLinks;
+    LinksList links, solvedLinks;
+    NodeListIterator bucketItr;
+    HeuristicNodeData heuristicData;
+
+  public:
+
+    typedef typename LinksList::iterator AdjLinkIterator;
+
+  private:
+
+    AdjLinkIterator addLink(const GraphEdgeIterator &edgeItr) {
+      ++numLinks;
+      return links.insert(links.end(), edgeItr);
+    }
+
+    void delLink(const AdjLinkIterator &adjLinkItr) {
+      --numLinks;
+      links.erase(adjLinkItr);
+    }
+
+  public:
+
+    NodeData() : numLinks(0) {}
+
+    unsigned getLinkDegree() const { return numLinks; }
+
+    HeuristicNodeData& getHeuristicData() { return heuristicData; }
+    const HeuristicNodeData& getHeuristicData() const {
+      return heuristicData;
+    }
+
+    void setBucketItr(const NodeListIterator &bucketItr) {
+      this->bucketItr = bucketItr;
+    }
+
+    const NodeListIterator& getBucketItr() const {
+      return bucketItr;
+    }
+
+    AdjLinkIterator adjLinksBegin() {
+      return links.begin();
+    }
+
+    AdjLinkIterator adjLinksEnd() {
+      return links.end();
+    }
+
+    void addSolvedLink(const GraphEdgeIterator &solvedLinkItr) {
+      solvedLinks.push_back(solvedLinkItr);
+    }
+
+    AdjLinkIterator solvedLinksBegin() {
+      return solvedLinks.begin();
+    }
+
+    AdjLinkIterator solvedLinksEnd() {
+      return solvedLinks.end();
+    }
+
+  };
+
+  class EdgeData {
+  private:
+
+    SolverGraph &g;
+    GraphNodeIterator node1Itr, node2Itr;
+    HeuristicEdgeData heuristicData;
+    typename NodeData::AdjLinkIterator node1ThisEdgeItr, node2ThisEdgeItr;
+
+  public:
+
+    EdgeData(SolverGraph &g) : g(g) {}
+
+    HeuristicEdgeData& getHeuristicData() { return heuristicData; }
+    const HeuristicEdgeData& getHeuristicData() const {
+      return heuristicData;
+    }
+
+    void setup(const GraphEdgeIterator &thisEdgeItr) {
+      node1Itr = g.getEdgeNode1Itr(thisEdgeItr);
+      node2Itr = g.getEdgeNode2Itr(thisEdgeItr);
+
+      node1ThisEdgeItr = g.getNodeData(node1Itr).addLink(thisEdgeItr);
+      node2ThisEdgeItr = g.getNodeData(node2Itr).addLink(thisEdgeItr);
+    }
+
+    void unlink() {
+      g.getNodeData(node1Itr).delLink(node1ThisEdgeItr);
+      g.getNodeData(node2Itr).delLink(node2ThisEdgeItr);
+    }
+
+  };
+
+};
+
+template <typename Heuristic>
+class HeuristicSolverImpl {
+public:
+  // Typedefs to make life easier:
+  typedef HSITypes<typename Heuristic::NodeData,
+                   typename Heuristic::EdgeData> HSIT;
+  typedef typename HSIT::SolverGraph SolverGraph;
+  typedef typename HSIT::NodeData NodeData;
+  typedef typename HSIT::EdgeData EdgeData;
+  typedef typename HSIT::GraphNodeIterator GraphNodeIterator;
+  typedef typename HSIT::GraphEdgeIterator GraphEdgeIterator;
+  typedef typename HSIT::GraphAdjEdgeIterator GraphAdjEdgeIterator;
+
+  typedef typename HSIT::NodeList NodeList;
+  typedef typename HSIT::NodeListIterator NodeListIterator;
+
+  typedef std::vector<GraphNodeIterator> NodeStack;
+  typedef typename NodeStack::iterator NodeStackIterator;
+
+  /// \brief Constructor, which performs all the actual solver work.
+  HeuristicSolverImpl(const SimpleGraph &orig) :
+    solution(orig.getNumNodes(), true)
+  {
+    copyGraph(orig);
+    simplify();
+    setup();
+    computeSolution();
+    computeSolutionCost(orig);
+  }
+
+  /// \brief Returns the graph for this solver.
+  SolverGraph& getGraph() { return g; }
+
+  /// \brief Return the solution found by this solver.
+  const Solution& getSolution() const { return solution; }
+
+private:
+
+  /// \brief Add the given node to the appropriate bucket for its link
+  /// degree.
+  void addToBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    switch (nodeData.getLinkDegree()) {
+      case 0: nodeData.setBucketItr(
+                r0Bucket.insert(r0Bucket.end(), nodeItr));
+              break;                                            
+      case 1: nodeData.setBucketItr(
+                r1Bucket.insert(r1Bucket.end(), nodeItr));
+              break;
+      case 2: nodeData.setBucketItr(
+                r2Bucket.insert(r2Bucket.end(), nodeItr));
+              break;
+      default: heuristic.addToRNBucket(nodeItr);
+               break;
+    }
+  }
+
+  /// \brief Remove the given node from the appropriate bucket for its link
+  /// degree.
+  void removeFromBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    switch (nodeData.getLinkDegree()) {
+      case 0: r0Bucket.erase(nodeData.getBucketItr()); break;
+      case 1: r1Bucket.erase(nodeData.getBucketItr()); break;
+      case 2: r2Bucket.erase(nodeData.getBucketItr()); break;
+      default: heuristic.removeFromRNBucket(nodeItr); break;
+    }
+  }
+
+public:
+
+  /// \brief Add a link.
+  void addLink(const GraphEdgeIterator &edgeItr) {
+    g.getEdgeData(edgeItr).setup(edgeItr);
+
+    if ((g.getNodeData(g.getEdgeNode1Itr(edgeItr)).getLinkDegree() > 2) ||
+        (g.getNodeData(g.getEdgeNode2Itr(edgeItr)).getLinkDegree() > 2)) {
+      heuristic.handleAddLink(edgeItr);
+    }
+  }
+
+  /// \brief Remove link, update info for node.
+  ///
+  /// Only updates information for the given node, since usually the other
+  /// is about to be removed.
+  void removeLink(const GraphEdgeIterator &edgeItr,
+                  const GraphNodeIterator &nodeItr) {
+
+    if (g.getNodeData(nodeItr).getLinkDegree() > 2) {
+      heuristic.handleRemoveLink(edgeItr, nodeItr);
+    }
+    g.getEdgeData(edgeItr).unlink();
+  }
+
+  /// \brief Remove link, update info for both nodes. Useful for R2 only.
+  void removeLinkR2(const GraphEdgeIterator &edgeItr) {
+    GraphNodeIterator node1Itr = g.getEdgeNode1Itr(edgeItr);
+
+    if (g.getNodeData(node1Itr).getLinkDegree() > 2) {
+      heuristic.handleRemoveLink(edgeItr, node1Itr);
+    }
+    removeLink(edgeItr, g.getEdgeNode2Itr(edgeItr));
+  }
+
+  /// \brief Removes all links connected to the given node.
+  void unlinkNode(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    typedef std::vector<GraphEdgeIterator> TempEdgeList;
+
+    TempEdgeList edgesToUnlink;
+    edgesToUnlink.reserve(nodeData.getLinkDegree());
+
+    // Copy adj edges into a temp vector. We want to destroy them during
+    // the unlink, and we can't do that while we're iterating over them.
+    std::copy(nodeData.adjLinksBegin(), nodeData.adjLinksEnd(),
+              std::back_inserter(edgesToUnlink));
+
+    for (typename TempEdgeList::iterator
+         edgeItr = edgesToUnlink.begin(), edgeEnd = edgesToUnlink.end();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      GraphNodeIterator otherNode = g.getEdgeOtherNode(*edgeItr, nodeItr);
+
+      removeFromBucket(otherNode);
+      removeLink(*edgeItr, otherNode);
+      addToBucket(otherNode);
+    }
+  }
+
+  /// \brief Push the given node onto the stack to be solved with
+  /// backpropagation.
+  void pushStack(const GraphNodeIterator &nodeItr) {
+    stack.push_back(nodeItr);
+  }
+
+  /// \brief Set the solution of the given node.
+  void setSolution(const GraphNodeIterator &nodeItr, unsigned solIndex) {
+    solution.setSelection(g.getNodeID(nodeItr), solIndex);
+
+    for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr),
+         adjEdgeEnd = g.adjEdgesEnd(nodeItr);
+         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+      GraphEdgeIterator edgeItr(*adjEdgeItr);
+      GraphNodeIterator adjNodeItr(g.getEdgeOtherNode(edgeItr, nodeItr));
+      g.getNodeData(adjNodeItr).addSolvedLink(edgeItr);
+    }
+  }
+
+private:
+
+  SolverGraph g;
+  Heuristic heuristic;
+  Solution solution;
+
+  NodeList r0Bucket,
+           r1Bucket,
+           r2Bucket;
+
+  NodeStack stack;
+
+  // Copy the SimpleGraph into an annotated graph which we can use for reduction.
+  void copyGraph(const SimpleGraph &orig) {
+
+    assert((g.getNumEdges() == 0) && (g.getNumNodes() == 0) &&
+           "Graph should be empty prior to solver setup.");
+
+    assert(orig.areNodeIDsValid() &&
+           "Cannot copy from a graph with invalid node IDs.");
+
+    std::vector<GraphNodeIterator> newNodeItrs;
+
+    for (unsigned nodeID = 0; nodeID < orig.getNumNodes(); ++nodeID) {
+      newNodeItrs.push_back(
+        g.addNode(orig.getNodeCosts(orig.getNodeItr(nodeID)), NodeData()));
+    }
+
+    for (SimpleGraph::ConstEdgeIterator
+         origEdgeItr = orig.edgesBegin(), origEdgeEnd = orig.edgesEnd();
+         origEdgeItr != origEdgeEnd; ++origEdgeItr) {
+
+      unsigned id1 = orig.getNodeID(orig.getEdgeNode1Itr(origEdgeItr)),
+               id2 = orig.getNodeID(orig.getEdgeNode2Itr(origEdgeItr));
+
+      g.addEdge(newNodeItrs[id1], newNodeItrs[id2],
+                orig.getEdgeCosts(origEdgeItr), EdgeData(g));
+    }
+
+    // Assign IDs to the new nodes using the ordering from the old graph,
+    // this will lead to nodes in the new graph getting the same ID as the
+    // corresponding node in the old graph.
+    g.assignNodeIDs(newNodeItrs);
+  }
+
+  // Simplify the annotated graph by eliminating independent edges and trivial
+  // nodes. 
+  void simplify() {
+    disconnectTrivialNodes();
+    eliminateIndependentEdges();
+  }
+
+  // Eliminate trivial nodes.
+  void disconnectTrivialNodes() {
+    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      if (g.getNodeCosts(nodeItr).getLength() == 1) {
+
+        std::vector<GraphEdgeIterator> edgesToRemove;
+
+        for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr),
+             adjEdgeEnd = g.adjEdgesEnd(nodeItr);
+             adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+
+          GraphEdgeIterator edgeItr = *adjEdgeItr;
+
+          if (g.getEdgeNode1Itr(edgeItr) == nodeItr) {
+            GraphNodeIterator otherNodeItr = g.getEdgeNode2Itr(edgeItr);
+            g.getNodeCosts(otherNodeItr) +=
+              g.getEdgeCosts(edgeItr).getRowAsVector(0);
+          }
+          else {
+            GraphNodeIterator otherNodeItr = g.getEdgeNode1Itr(edgeItr);
+            g.getNodeCosts(otherNodeItr) +=
+              g.getEdgeCosts(edgeItr).getColAsVector(0);
+          }
+
+          edgesToRemove.push_back(edgeItr);
+        }
+
+        while (!edgesToRemove.empty()) {
+          g.removeEdge(edgesToRemove.back());
+          edgesToRemove.pop_back();
+        }
+      }
+    }
+  }
+
+  void eliminateIndependentEdges() {
+    std::vector<GraphEdgeIterator> edgesToProcess;
+
+    for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+      edgesToProcess.push_back(edgeItr);
+    }
+
+    while (!edgesToProcess.empty()) {
+      tryToEliminateEdge(edgesToProcess.back());
+      edgesToProcess.pop_back();
+    }
+  }
+
+  void tryToEliminateEdge(const GraphEdgeIterator &edgeItr) {
+    if (tryNormaliseEdgeMatrix(edgeItr)) {
+      g.removeEdge(edgeItr); 
+    }
+  }
+
+  bool tryNormaliseEdgeMatrix(const GraphEdgeIterator &edgeItr) {
+
+    Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+    Vector &uCosts = g.getNodeCosts(g.getEdgeNode1Itr(edgeItr)),
+               &vCosts = g.getNodeCosts(g.getEdgeNode2Itr(edgeItr));
+
+    for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+      PBQPNum rowMin = edgeCosts.getRowMin(r);
+      uCosts[r] += rowMin;
+      if (rowMin != std::numeric_limits<PBQPNum>::infinity()) {
+        edgeCosts.subFromRow(r, rowMin);
+      }
+      else {
+        edgeCosts.setRow(r, 0);
+      }
+    }
+
+    for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+      PBQPNum colMin = edgeCosts.getColMin(c);
+      vCosts[c] += colMin;
+      if (colMin != std::numeric_limits<PBQPNum>::infinity()) {
+        edgeCosts.subFromCol(c, colMin);
+      }
+      else {
+        edgeCosts.setCol(c, 0);
+      }
+    }
+
+    return edgeCosts.isZero();
+  }
+
+  void setup() {
+    setupLinks();
+    heuristic.initialise(*this);
+    setupBuckets();
+  }
+
+  void setupLinks() {
+    for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+      g.getEdgeData(edgeItr).setup(edgeItr);
+    }
+  }
+
+  void setupBuckets() {
+    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+      addToBucket(nodeItr);
+    }
+  }
+
+  void computeSolution() {
+    assert(g.areNodeIDsValid() &&
+           "Nodes cannot be added/removed during reduction.");
+
+    reduce();
+    computeTrivialSolutions();
+    backpropagate();
+  }
+
+  void printNode(const GraphNodeIterator &nodeItr) {
+    llvm::errs() << "Node " << g.getNodeID(nodeItr) << " (" << &*nodeItr << "):\n"
+                 << "  costs = " << g.getNodeCosts(nodeItr) << "\n"
+                 << "  link degree = " << g.getNodeData(nodeItr).getLinkDegree() << "\n"
+                 << "  links = [ ";
+
+    for (typename HSIT::NodeData::AdjLinkIterator 
+         aeItr = g.getNodeData(nodeItr).adjLinksBegin(),
+         aeEnd = g.getNodeData(nodeItr).adjLinksEnd();
+         aeItr != aeEnd; ++aeItr) {
+      llvm::errs() << "(" << g.getNodeID(g.getEdgeNode1Itr(*aeItr))
+                   << ", " << g.getNodeID(g.getEdgeNode2Itr(*aeItr))
+                   << ") ";
+    }
+    llvm::errs() << "]\n";
+  }
+
+  void dumpState() {
+    llvm::errs() << "\n";
+
+    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+      printNode(nodeItr);
+    }
+
+    NodeList* buckets[] = { &r0Bucket, &r1Bucket, &r2Bucket };
+
+    for (unsigned b = 0; b < 3; ++b) {
+      NodeList &bucket = *buckets[b];
+
+      llvm::errs() << "Bucket " << b << ": [ ";
+
+      for (NodeListIterator nItr = bucket.begin(), nEnd = bucket.end();
+           nItr != nEnd; ++nItr) {
+        llvm::errs() << g.getNodeID(*nItr) << " ";
+      }
+
+      llvm::errs() << "]\n";
+    }
+
+    llvm::errs() << "Stack: [ ";
+    for (NodeStackIterator nsItr = stack.begin(), nsEnd = stack.end();
+         nsItr != nsEnd; ++nsItr) {
+      llvm::errs() << g.getNodeID(*nsItr) << " ";
+    }
+    llvm::errs() << "]\n";
+  }
+
+  void reduce() {
+    bool reductionFinished = r1Bucket.empty() && r2Bucket.empty() &&
+      heuristic.rNBucketEmpty();
+
+    while (!reductionFinished) {
+
+      if (!r1Bucket.empty()) {
+        processR1();
+      }
+      else if (!r2Bucket.empty()) {
+        processR2();
+      }
+      else if (!heuristic.rNBucketEmpty()) {
+        solution.setProvedOptimal(false);
+        solution.incRNReductions();
+        heuristic.processRN();
+      } 
+      else reductionFinished = true;
+    }
+      
+  };
+
+  void processR1() {
+
+    // Remove the first node in the R0 bucket:
+    GraphNodeIterator xNodeItr = r1Bucket.front();
+    r1Bucket.pop_front();
+
+    solution.incR1Reductions();
+
+    //llvm::errs() << "Applying R1 to " << g.getNodeID(xNodeItr) << "\n";
+
+    assert((g.getNodeData(xNodeItr).getLinkDegree() == 1) &&
+           "Node in R1 bucket has degree != 1");
+
+    GraphEdgeIterator edgeItr = *g.getNodeData(xNodeItr).adjLinksBegin();
+
+    const Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+
+    const Vector &xCosts = g.getNodeCosts(xNodeItr);
+    unsigned xLen = xCosts.getLength();
+
+    // Duplicate a little code to avoid transposing matrices:
+    if (xNodeItr == g.getEdgeNode1Itr(edgeItr)) {
+      GraphNodeIterator yNodeItr = g.getEdgeNode2Itr(edgeItr);
+      Vector &yCosts = g.getNodeCosts(yNodeItr);
+      unsigned yLen = yCosts.getLength();
+
+      for (unsigned j = 0; j < yLen; ++j) {
+        PBQPNum min = edgeCosts[0][j] + xCosts[0];
+        for (unsigned i = 1; i < xLen; ++i) {
+          PBQPNum c = edgeCosts[i][j] + xCosts[i];
+          if (c < min)
+            min = c;
+        }
+        yCosts[j] += min;
+      }
+    }
+    else {
+      GraphNodeIterator yNodeItr = g.getEdgeNode1Itr(edgeItr);
+      Vector &yCosts = g.getNodeCosts(yNodeItr);
+      unsigned yLen = yCosts.getLength();
+
+      for (unsigned i = 0; i < yLen; ++i) {
+        PBQPNum min = edgeCosts[i][0] + xCosts[0];
+
+        for (unsigned j = 1; j < xLen; ++j) {
+          PBQPNum c = edgeCosts[i][j] + xCosts[j];
+          if (c < min)
+            min = c;
+        }
+        yCosts[i] += min;
+      }
+    }
+
+    unlinkNode(xNodeItr);
+    pushStack(xNodeItr);
+  }
+
+  void processR2() {
+
+    GraphNodeIterator xNodeItr = r2Bucket.front();
+    r2Bucket.pop_front();
+
+    solution.incR2Reductions();
+
+    // Unlink is unsafe here. At some point it may optimistically more a node
+    // to a lower-degree list when its degree will later rise, or vice versa,
+    // violating the assumption that node degrees monotonically decrease
+    // during the reduction phase. Instead we'll bucket shuffle manually.
+    pushStack(xNodeItr);
+
+    assert((g.getNodeData(xNodeItr).getLinkDegree() == 2) &&
+           "Node in R2 bucket has degree != 2");
+
+    const Vector &xCosts = g.getNodeCosts(xNodeItr);
+
+    typename NodeData::AdjLinkIterator tempItr =
+      g.getNodeData(xNodeItr).adjLinksBegin();
+
+    GraphEdgeIterator yxEdgeItr = *tempItr,
+                      zxEdgeItr = *(++tempItr);
+
+    GraphNodeIterator yNodeItr = g.getEdgeOtherNode(yxEdgeItr, xNodeItr),
+                      zNodeItr = g.getEdgeOtherNode(zxEdgeItr, xNodeItr);
+
+    removeFromBucket(yNodeItr);
+    removeFromBucket(zNodeItr);
+
+    removeLink(yxEdgeItr, yNodeItr);
+    removeLink(zxEdgeItr, zNodeItr);
+
+    // Graph some of the costs:
+    bool flipEdge1 = (g.getEdgeNode1Itr(yxEdgeItr) == xNodeItr),
+         flipEdge2 = (g.getEdgeNode1Itr(zxEdgeItr) == xNodeItr);
+
+    const Matrix *yxCosts = flipEdge1 ?
+      new Matrix(g.getEdgeCosts(yxEdgeItr).transpose()) :
+      &g.getEdgeCosts(yxEdgeItr),
+                     *zxCosts = flipEdge2 ?
+      new Matrix(g.getEdgeCosts(zxEdgeItr).transpose()) :
+        &g.getEdgeCosts(zxEdgeItr);
+
+    unsigned xLen = xCosts.getLength(),
+             yLen = yxCosts->getRows(),
+             zLen = zxCosts->getRows();
+
+    // Compute delta:
+    Matrix delta(yLen, zLen);
+
+    for (unsigned i = 0; i < yLen; ++i) {
+      for (unsigned j = 0; j < zLen; ++j) {
+        PBQPNum min = (*yxCosts)[i][0] + (*zxCosts)[j][0] + xCosts[0];
+        for (unsigned k = 1; k < xLen; ++k) {
+          PBQPNum c = (*yxCosts)[i][k] + (*zxCosts)[j][k] + xCosts[k];
+          if (c < min) {
+            min = c;
+          }
+        }
+        delta[i][j] = min;
+      }
+    }
+
+    if (flipEdge1)
+      delete yxCosts;
+
+    if (flipEdge2)
+      delete zxCosts;
+
+    // Deal with the potentially induced yz edge.
+    GraphEdgeIterator yzEdgeItr = g.findEdge(yNodeItr, zNodeItr);
+    if (yzEdgeItr == g.edgesEnd()) {
+      yzEdgeItr = g.addEdge(yNodeItr, zNodeItr, delta, EdgeData(g));
+    }
+    else {
+      // There was an edge, but we're going to screw with it. Delete the old
+      // link, update the costs. We'll re-link it later.
+      removeLinkR2(yzEdgeItr);
+      g.getEdgeCosts(yzEdgeItr) +=
+        (yNodeItr == g.getEdgeNode1Itr(yzEdgeItr)) ?
+        delta : delta.transpose();
+    }
+
+    bool nullCostEdge = tryNormaliseEdgeMatrix(yzEdgeItr);
+
+    // Nulled the edge, remove it entirely.
+    if (nullCostEdge) {
+      g.removeEdge(yzEdgeItr);
+    }
+    else {
+      // Edge remains - re-link it.
+      addLink(yzEdgeItr);
+    }
+
+    addToBucket(yNodeItr);
+    addToBucket(zNodeItr);
+    }
+
+  void computeTrivialSolutions() {
+
+    for (NodeListIterator r0Itr = r0Bucket.begin(), r0End = r0Bucket.end();
+         r0Itr != r0End; ++r0Itr) {
+      GraphNodeIterator nodeItr = *r0Itr;
+
+      solution.incR0Reductions();
+      setSolution(nodeItr, g.getNodeCosts(nodeItr).minIndex());
+    }
+
+  }
+
+  void backpropagate() {
+    while (!stack.empty()) {
+      computeSolution(stack.back());
+      stack.pop_back();
+    }
+  }
+
+  void computeSolution(const GraphNodeIterator &nodeItr) {
+
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    Vector v(g.getNodeCosts(nodeItr));
+
+    // Solve based on existing links.
+    for (typename NodeData::AdjLinkIterator
+         solvedLinkItr = nodeData.solvedLinksBegin(),
+         solvedLinkEnd = nodeData.solvedLinksEnd();
+         solvedLinkItr != solvedLinkEnd; ++solvedLinkItr) {
+
+      GraphEdgeIterator solvedEdgeItr(*solvedLinkItr);
+      Matrix &edgeCosts = g.getEdgeCosts(solvedEdgeItr);
+
+      if (nodeItr == g.getEdgeNode1Itr(solvedEdgeItr)) {
+        GraphNodeIterator adjNode(g.getEdgeNode2Itr(solvedEdgeItr));
+        unsigned adjSolution =
+          solution.getSelection(g.getNodeID(adjNode));
+        v += edgeCosts.getColAsVector(adjSolution);
+      }
+      else {
+        GraphNodeIterator adjNode(g.getEdgeNode1Itr(solvedEdgeItr));
+        unsigned adjSolution =
+          solution.getSelection(g.getNodeID(adjNode));
+        v += edgeCosts.getRowAsVector(adjSolution);
+      }
+
+    }
+
+    setSolution(nodeItr, v.minIndex());
+  }
+
+  void computeSolutionCost(const SimpleGraph &orig) {
+    PBQPNum cost = 0.0;
+
+    for (SimpleGraph::ConstNodeIterator
+         nodeItr = orig.nodesBegin(), nodeEnd = orig.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      unsigned nodeId = orig.getNodeID(nodeItr);
+
+      cost += orig.getNodeCosts(nodeItr)[solution.getSelection(nodeId)];
+    }
+
+    for (SimpleGraph::ConstEdgeIterator
+         edgeItr = orig.edgesBegin(), edgeEnd = orig.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      SimpleGraph::ConstNodeIterator n1 = orig.getEdgeNode1Itr(edgeItr),
+                                     n2 = orig.getEdgeNode2Itr(edgeItr);
+      unsigned sol1 = solution.getSelection(orig.getNodeID(n1)),
+               sol2 = solution.getSelection(orig.getNodeID(n2));
+
+      cost += orig.getEdgeCosts(edgeItr)[sol1][sol2];
+    }
+
+    solution.setSolutionCost(cost);
+  }
+
+};
+
+template <typename Heuristic>
+class HeuristicSolver : public Solver {
+public:
+  Solution solve(const SimpleGraph &g) const {
+    HeuristicSolverImpl<Heuristic> solverImpl(g);
+    return solverImpl.getSolution();
+  }
+};
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
new file mode 100644
index 0000000..3ac9e70
--- /dev/null
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -0,0 +1,383 @@
+//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the Briggs test for "allocability" of nodes in a
+// PBQP graph representing a register allocation problem. Nodes which can be
+// proven allocable (by a safe and relatively accurate test) are removed from
+// the PBQP graph first. If no provably allocable node is present in the graph
+// then the node with the minimal spill-cost to degree ratio is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+
+#include "../HeuristicSolver.h"
+
+#include <set>
+
+namespace PBQP {
+namespace Heuristics {
+
+class Briggs {
+  public:
+
+    class NodeData;
+    class EdgeData;
+
+  private:
+
+    typedef HeuristicSolverImpl<Briggs> Solver;
+    typedef HSITypes<NodeData, EdgeData> HSIT;
+    typedef HSIT::SolverGraph SolverGraph;
+    typedef HSIT::GraphNodeIterator GraphNodeIterator;
+    typedef HSIT::GraphEdgeIterator GraphEdgeIterator;
+
+    class LinkDegreeComparator {
+      public:
+        LinkDegreeComparator() : g(0) {}
+        LinkDegreeComparator(SolverGraph *g) : g(g) {}
+
+        bool operator()(const GraphNodeIterator &node1Itr,
+                        const GraphNodeIterator &node2Itr) const {
+          assert((g != 0) && "Graph object not set, cannot access node data.");
+          unsigned n1Degree = g->getNodeData(node1Itr).getLinkDegree(),
+                   n2Degree = g->getNodeData(node2Itr).getLinkDegree();
+          if (n1Degree > n2Degree) {
+            return true;
+          }
+          else if (n1Degree < n2Degree) {
+            return false;
+          }
+          // else they're "equal" by degree, differentiate based on ID.
+          return g->getNodeID(node1Itr) < g->getNodeID(node2Itr);
+        }
+
+      private:
+        SolverGraph *g;
+    };
+
+    class SpillPriorityComparator {
+      public:
+        SpillPriorityComparator() : g(0) {}
+        SpillPriorityComparator(SolverGraph *g) : g(g) {}
+
+        bool operator()(const GraphNodeIterator &node1Itr,
+                        const GraphNodeIterator &node2Itr) const {
+          assert((g != 0) && "Graph object not set, cannot access node data.");
+          PBQPNum cost1 =
+            g->getNodeCosts(node1Itr)[0] /
+            g->getNodeData(node1Itr).getLinkDegree(),
+            cost2 =
+              g->getNodeCosts(node2Itr)[0] /
+              g->getNodeData(node2Itr).getLinkDegree();
+
+          if (cost1 < cost2) {
+            return true;
+          }
+          else if (cost1 > cost2) {
+            return false;
+          }
+          // else they'er "equal" again, differentiate based on address again.
+          return g->getNodeID(node1Itr) < g->getNodeID(node2Itr);
+        }
+
+      private:
+        SolverGraph *g;
+    };
+
+    typedef std::set<GraphNodeIterator, LinkDegreeComparator>
+      RNAllocableNodeList;
+    typedef RNAllocableNodeList::iterator RNAllocableNodeListIterator;
+
+    typedef std::set<GraphNodeIterator, SpillPriorityComparator>
+      RNUnallocableNodeList;
+    typedef RNUnallocableNodeList::iterator RNUnallocableNodeListIterator;
+
+  public:
+
+    class NodeData {
+      private:
+        RNAllocableNodeListIterator rNAllocableNodeListItr;
+        RNUnallocableNodeListIterator rNUnallocableNodeListItr;
+        unsigned numRegOptions, numDenied, numSafe;
+        std::vector<unsigned> unsafeDegrees;
+        bool allocable;
+
+        void addRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
+            const GraphEdgeIterator &edgeItr, bool add) {
+
+          //assume we're adding...
+          unsigned udTarget = 0, dir = 1;
+
+          if (!add) {
+            udTarget = 1;
+            dir = ~0;
+          }
+
+          EdgeData &linkEdgeData = g.getEdgeData(edgeItr).getHeuristicData();
+
+          EdgeData::ConstUnsafeIterator edgeUnsafeBegin, edgeUnsafeEnd;
+
+          if (nodeItr == g.getEdgeNode1Itr(edgeItr)) {
+            numDenied += (dir * linkEdgeData.getWorstDegree());
+            edgeUnsafeBegin = linkEdgeData.unsafeBegin();
+            edgeUnsafeEnd = linkEdgeData.unsafeEnd();
+          }
+          else {
+            numDenied += (dir * linkEdgeData.getReverseWorstDegree());
+            edgeUnsafeBegin = linkEdgeData.reverseUnsafeBegin();
+            edgeUnsafeEnd = linkEdgeData.reverseUnsafeEnd();
+          }
+
+          assert((unsafeDegrees.size() ==
+                static_cast<unsigned>(
+                  std::distance(edgeUnsafeBegin, edgeUnsafeEnd)))
+              && "Unsafe array size mismatch.");
+
+          std::vector<unsigned>::iterator unsafeDegreesItr =
+            unsafeDegrees.begin();
+
+          for (EdgeData::ConstUnsafeIterator edgeUnsafeItr = edgeUnsafeBegin;
+              edgeUnsafeItr != edgeUnsafeEnd;
+              ++edgeUnsafeItr, ++unsafeDegreesItr) {
+
+            if ((*edgeUnsafeItr == 1) && (*unsafeDegreesItr == udTarget))  {
+              numSafe -= dir;
+            }
+            *unsafeDegreesItr += (dir * (*edgeUnsafeItr));
+          }
+
+          allocable = (numDenied < numRegOptions) || (numSafe > 0);
+        }
+
+      public:
+
+        void setup(SolverGraph &g, const GraphNodeIterator &nodeItr) {
+
+          numRegOptions = g.getNodeCosts(nodeItr).getLength() - 1;
+
+          numSafe = numRegOptions; // Optimistic, correct below.
+          numDenied = 0; // Also optimistic.
+          unsafeDegrees.resize(numRegOptions, 0);
+
+          HSIT::NodeData &nodeData = g.getNodeData(nodeItr);
+
+          for (HSIT::NodeData::AdjLinkIterator
+              adjLinkItr = nodeData.adjLinksBegin(),
+              adjLinkEnd = nodeData.adjLinksEnd();
+              adjLinkItr != adjLinkEnd; ++adjLinkItr) {
+
+            addRemoveLink(g, nodeItr, *adjLinkItr, true);
+          }
+        }
+
+        bool isAllocable() const { return allocable; }
+
+        void handleAddLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
+            const GraphEdgeIterator &adjEdge) {
+          addRemoveLink(g, nodeItr, adjEdge, true);
+        }
+
+        void handleRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
+            const GraphEdgeIterator &adjEdge) {
+          addRemoveLink(g, nodeItr, adjEdge, false);
+        }
+
+        void setRNAllocableNodeListItr(
+            const RNAllocableNodeListIterator &rNAllocableNodeListItr) {
+
+          this->rNAllocableNodeListItr = rNAllocableNodeListItr;
+        }
+
+        RNAllocableNodeListIterator getRNAllocableNodeListItr() const {
+          return rNAllocableNodeListItr;
+        }
+
+        void setRNUnallocableNodeListItr(
+            const RNUnallocableNodeListIterator &rNUnallocableNodeListItr) {
+
+          this->rNUnallocableNodeListItr = rNUnallocableNodeListItr;
+        }
+
+        RNUnallocableNodeListIterator getRNUnallocableNodeListItr() const {
+          return rNUnallocableNodeListItr;
+        }
+
+
+    };
+
+    class EdgeData {
+      private:
+
+        typedef std::vector<unsigned> UnsafeArray;
+
+        unsigned worstDegree,
+                 reverseWorstDegree;
+        UnsafeArray unsafe, reverseUnsafe;
+
+      public:
+
+        EdgeData() : worstDegree(0), reverseWorstDegree(0) {}
+
+        typedef UnsafeArray::const_iterator ConstUnsafeIterator;
+
+        void setup(SolverGraph &g, const GraphEdgeIterator &edgeItr) {
+          const Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+          unsigned numRegs = edgeCosts.getRows() - 1,
+                   numReverseRegs = edgeCosts.getCols() - 1;
+
+          unsafe.resize(numRegs, 0);
+          reverseUnsafe.resize(numReverseRegs, 0);
+
+          std::vector<unsigned> rowInfCounts(numRegs, 0),
+                                colInfCounts(numReverseRegs, 0);
+
+          for (unsigned i = 0; i < numRegs; ++i) {
+            for (unsigned j = 0; j < numReverseRegs; ++j) {
+              if (edgeCosts[i + 1][j + 1] ==
+                  std::numeric_limits<PBQPNum>::infinity()) {
+                unsafe[i] = 1;
+                reverseUnsafe[j] = 1;
+                ++rowInfCounts[i];
+                ++colInfCounts[j];
+
+                if (colInfCounts[j] > worstDegree) {
+                  worstDegree = colInfCounts[j];
+                }
+
+                if (rowInfCounts[i] > reverseWorstDegree) {
+                  reverseWorstDegree = rowInfCounts[i];
+                }
+              }
+            }
+          }
+        }
+
+        unsigned getWorstDegree() const { return worstDegree; }
+        unsigned getReverseWorstDegree() const { return reverseWorstDegree; }
+        ConstUnsafeIterator unsafeBegin() const { return unsafe.begin(); }
+        ConstUnsafeIterator unsafeEnd() const { return unsafe.end(); }
+        ConstUnsafeIterator reverseUnsafeBegin() const {
+          return reverseUnsafe.begin();
+        }
+        ConstUnsafeIterator reverseUnsafeEnd() const {
+          return reverseUnsafe.end();
+        }
+    };
+
+  void initialise(Solver &solver) {
+    this->s = &solver;
+    g = &s->getGraph();
+    rNAllocableBucket = RNAllocableNodeList(LinkDegreeComparator(g));
+    rNUnallocableBucket =
+      RNUnallocableNodeList(SpillPriorityComparator(g));
+    
+    for (GraphEdgeIterator
+         edgeItr = g->edgesBegin(), edgeEnd = g->edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr);
+    }
+
+    for (GraphNodeIterator
+         nodeItr = g->nodesBegin(), nodeEnd = g->nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      g->getNodeData(nodeItr).getHeuristicData().setup(*g, nodeItr);
+    }
+  }
+
+  void addToRNBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
+
+    if (nodeData.isAllocable()) {
+      nodeData.setRNAllocableNodeListItr(
+        rNAllocableBucket.insert(rNAllocableBucket.begin(), nodeItr));
+    }
+    else {
+      nodeData.setRNUnallocableNodeListItr(
+        rNUnallocableBucket.insert(rNUnallocableBucket.begin(), nodeItr));
+    }
+  }
+
+  void removeFromRNBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
+
+    if (nodeData.isAllocable()) {
+      rNAllocableBucket.erase(nodeData.getRNAllocableNodeListItr());
+    }
+    else {
+      rNUnallocableBucket.erase(nodeData.getRNUnallocableNodeListItr());
+    }
+  }
+
+  void handleAddLink(const GraphEdgeIterator &edgeItr) {
+    // We assume that if we got here this edge is attached to at least
+    // one high degree node.
+    g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr);
+
+    GraphNodeIterator n1Itr = g->getEdgeNode1Itr(edgeItr),
+                      n2Itr = g->getEdgeNode2Itr(edgeItr);
+   
+    HSIT::NodeData &n1Data = g->getNodeData(n1Itr),
+                   &n2Data = g->getNodeData(n2Itr);
+
+    if (n1Data.getLinkDegree() > 2) {
+      n1Data.getHeuristicData().handleAddLink(*g, n1Itr, edgeItr);
+    }
+    if (n2Data.getLinkDegree() > 2) {
+      n2Data.getHeuristicData().handleAddLink(*g, n2Itr, edgeItr);
+    }
+  }
+
+  void handleRemoveLink(const GraphEdgeIterator &edgeItr,
+                        const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
+    nodeData.handleRemoveLink(*g, nodeItr, edgeItr);
+  }
+
+  void processRN() {
+    
+    if (!rNAllocableBucket.empty()) {
+      GraphNodeIterator selectedNodeItr = *rNAllocableBucket.begin();
+      //std::cerr << "RN safely pushing " << g->getNodeID(selectedNodeItr) << "\n";
+      rNAllocableBucket.erase(rNAllocableBucket.begin());
+      s->pushStack(selectedNodeItr);
+      s->unlinkNode(selectedNodeItr);
+    }
+    else {
+      GraphNodeIterator selectedNodeItr = *rNUnallocableBucket.begin();
+      //std::cerr << "RN optimistically pushing " << g->getNodeID(selectedNodeItr) << "\n";
+      rNUnallocableBucket.erase(rNUnallocableBucket.begin());
+      s->pushStack(selectedNodeItr);
+      s->unlinkNode(selectedNodeItr);
+    }
+ 
+  }
+
+  bool rNBucketEmpty() const {
+    return (rNAllocableBucket.empty() && rNUnallocableBucket.empty());
+  }
+
+private:
+
+  Solver *s;
+  SolverGraph *g;
+  RNAllocableNodeList rNAllocableBucket;
+  RNUnallocableNodeList rNUnallocableBucket;
+};
+
+
+
+}
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h
new file mode 100644
index 0000000..11f4b4b
--- /dev/null
+++ b/lib/CodeGen/PBQP/PBQPMath.h
@@ -0,0 +1,288 @@
+//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_PBQPMATH_H 
+#define LLVM_CODEGEN_PBQP_PBQPMATH_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace PBQP {
+
+typedef double PBQPNum;
+
+/// \brief PBQP Vector class.
+class Vector {
+  public:
+
+    /// \brief Construct a PBQP vector of the given size.
+    explicit Vector(unsigned length) :
+      length(length), data(new PBQPNum[length]) {
+      }
+
+    /// \brief Construct a PBQP vector with initializer.
+    Vector(unsigned length, PBQPNum initVal) :
+      length(length), data(new PBQPNum[length]) {
+        std::fill(data, data + length, initVal);
+      }
+
+    /// \brief Copy construct a PBQP vector.
+    Vector(const Vector &v) :
+      length(v.length), data(new PBQPNum[length]) {
+        std::copy(v.data, v.data + length, data);
+      }
+
+    /// \brief Destroy this vector, return its memory.
+    ~Vector() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Vector& operator=(const Vector &v) {
+      delete[] data;
+      length = v.length;
+      data = new PBQPNum[length];
+      std::copy(v.data, v.data + length, data);
+      return *this;
+    }
+
+    /// \brief Return the length of the vector
+    unsigned getLength() const {
+      return length;
+    }
+
+    /// \brief Element access.
+    PBQPNum& operator[](unsigned index) {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Const element access.
+    const PBQPNum& operator[](unsigned index) const {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Add another vector to this one.
+    Vector& operator+=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Subtract another vector from this one.
+    Vector& operator-=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Returns the index of the minimum value in this vector
+    unsigned minIndex() const {
+      return std::min_element(data, data + length) - data;
+    }
+
+  private:
+    unsigned length;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given vector on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Vector &v) {
+  assert((v.getLength() != 0) && "Zero-length vector badness.");
+
+  os << "[ " << v[0];
+  for (unsigned i = 1; i < v.getLength(); ++i) {
+    os << ", " << v[i];
+  }
+  os << " ]";
+
+  return os;
+} 
+
+
+/// \brief PBQP Matrix class
+class Matrix {
+  public:
+
+    /// \brief Construct a PBQP Matrix with the given dimensions.
+    Matrix(unsigned rows, unsigned cols) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+    }
+
+    /// \brief Construct a PBQP Matrix with the given dimensions and initial
+    /// value.
+    Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+        std::fill(data, data + (rows * cols), initVal);
+    }
+
+    /// \brief Copy construct a PBQP matrix.
+    Matrix(const Matrix &m) :
+      rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+        std::copy(m.data, m.data + (rows * cols), data);  
+    }
+
+    /// \brief Destroy this matrix, return its memory.
+    ~Matrix() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Matrix& operator=(const Matrix &m) {
+      delete[] data;
+      rows = m.rows; cols = m.cols;
+      data = new PBQPNum[rows * cols];
+      std::copy(m.data, m.data + (rows * cols), data);
+      return *this;
+    }
+
+    /// \brief Return the number of rows in this matrix.
+    unsigned getRows() const { return rows; }
+
+    /// \brief Return the number of cols in this matrix.
+    unsigned getCols() const { return cols; }
+
+    /// \brief Matrix element access.
+    PBQPNum* operator[](unsigned r) {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Matrix element access.
+    const PBQPNum* operator[](unsigned r) const {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Returns the given row as a vector.
+    Vector getRowAsVector(unsigned r) const {
+      Vector v(cols);
+      for (unsigned c = 0; c < cols; ++c)
+        v[c] = (*this)[r][c];
+      return v; 
+    }
+
+    /// \brief Returns the given column as a vector.
+    Vector getColAsVector(unsigned c) const {
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][c];
+      return v;
+    }
+
+    /// \brief Reset the matrix to the given value.
+    Matrix& reset(PBQPNum val = 0) {
+      std::fill(data, data + (rows * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single row of this matrix to the given value.
+    Matrix& setRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds.");
+      std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single column of this matrix to the given value.
+    Matrix& setCol(unsigned c, PBQPNum val) {
+      assert(c < cols && "Column out of bounds.");
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] = val;
+      return *this;
+    }
+
+    /// \brief Matrix transpose.
+    Matrix transpose() const {
+      Matrix m(cols, rows);
+      for (unsigned r = 0; r < rows; ++r)
+        for (unsigned c = 0; c < cols; ++c)
+          m[c][r] = (*this)[r][c];
+      return m;
+    }
+
+    /// \brief Returns the diagonal of the matrix as a vector.
+    ///
+    /// Matrix must be square.
+    Vector diagonalize() const {
+      assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][r];
+      return v;
+    } 
+
+    /// \brief Add the given matrix to this one.
+    Matrix& operator+=(const Matrix &m) {
+      assert(rows == m.rows && cols == m.cols &&
+          "Matrix dimensions mismatch.");
+      std::transform(data, data + (rows * cols), m.data, data,
+          std::plus<PBQPNum>());
+      return *this;
+    }
+
+    /// \brief Returns the minimum of the given row
+    PBQPNum getRowMin(unsigned r) const {
+      assert(r < rows && "Row out of bounds");
+      return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+    }
+
+    /// \brief Returns the minimum of the given column
+    PBQPNum getColMin(unsigned c) const {
+      PBQPNum minElem = (*this)[0][c];
+      for (unsigned r = 1; r < rows; ++r)
+        if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+      return minElem;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given row.
+    Matrix& subFromRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds");
+      std::transform(data + (r * cols), data + ((r + 1) * cols),
+          data + (r * cols),
+          std::bind2nd(std::minus<PBQPNum>(), val));
+      return *this;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given column.
+    Matrix& subFromCol(unsigned c, PBQPNum val) {
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] -= val;
+      return *this;
+    }
+
+    /// \brief Returns true if this is a zero matrix.
+    bool isZero() const {
+      return find_if(data, data + (rows * cols),
+          std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+        data + (rows * cols);
+    }
+
+  private:
+    unsigned rows, cols;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given matrix on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Matrix &m) {
+
+  assert((m.getRows() != 0) && "Zero-row matrix badness.");
+
+  for (unsigned i = 0; i < m.getRows(); ++i) {
+    os << m.getRowAsVector(i);
+  }
+
+  return os;
+}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_PBQPMATH_HPP
diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h
new file mode 100644
index 0000000..1ca9cae
--- /dev/null
+++ b/lib/CodeGen/PBQP/SimpleGraph.h
@@ -0,0 +1,100 @@
+//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple PBQP graph class representing a PBQP problem. Graphs of this type
+// can be passed to a PBQPSolver instance to solve the PBQP problem.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
+#define LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
+
+#include "GraphBase.h"
+
+namespace PBQP {
+
+class SimpleEdge;
+
+class SimpleNode : public NodeBase<SimpleNode, SimpleEdge> {
+public:
+  SimpleNode(const Vector &costs) :
+    NodeBase<SimpleNode, SimpleEdge>(costs) {}
+};
+
+class SimpleEdge : public EdgeBase<SimpleNode, SimpleEdge> {
+public:
+  SimpleEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
+             const Matrix &costs) :
+    EdgeBase<SimpleNode, SimpleEdge>(node1Itr, node2Itr, costs) {}
+};
+
+class SimpleGraph : public GraphBase<SimpleNode, SimpleEdge> {
+private:
+
+  typedef GraphBase<SimpleNode, SimpleEdge> PGraph;
+
+  void copyFrom(const SimpleGraph &other) {
+    assert(other.areNodeIDsValid() &&
+           "Cannot copy from another graph unless IDs have been assigned.");
+   
+    std::vector<NodeIterator> newNodeItrs(other.getNumNodes());
+
+    for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr));
+    }
+
+    for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      unsigned node1ID = other.getNodeID(other.getEdgeNode1Itr(eItr)),
+               node2ID = other.getNodeID(other.getEdgeNode2Itr(eItr));
+
+      addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
+              other.getEdgeCosts(eItr));
+    }
+  }
+
+  void copyFrom(SimpleGraph &other) {
+    if (!other.areNodeIDsValid()) {
+      other.assignNodeIDs();
+    }
+    copyFrom(const_cast<const SimpleGraph&>(other));
+  }
+
+public:
+
+  SimpleGraph() {}
+
+
+  SimpleGraph(const SimpleGraph &other) : PGraph() {
+    copyFrom(other);
+  }
+
+  SimpleGraph& operator=(const SimpleGraph &other) {
+    clear();
+    copyFrom(other);
+    return *this;
+  }
+
+  NodeIterator addNode(const Vector &costs) {
+    return PGraph::addConstructedNode(SimpleNode(costs));
+  }
+
+  EdgeIterator addEdge(const NodeIterator &node1Itr,
+                       const NodeIterator &node2Itr,
+                       const Matrix &costs) {
+    return PGraph::addConstructedEdge(SimpleEdge(node1Itr, node2Itr, costs));
+  }
+
+};
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
new file mode 100644
index 0000000..c91e2fa
--- /dev/null
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -0,0 +1,88 @@
+//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Annotated PBQP Graph class. This class is used internally by the PBQP solver
+// to cache information to speed up reduction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
+#define LLVM_CODEGEN_PBQP_SOLUTION_H
+
+#include "PBQPMath.h"
+
+namespace PBQP {
+
+class Solution {
+
+  friend class SolverImplementation;
+
+private:
+
+  std::vector<unsigned> selections;
+  PBQPNum solutionCost;
+  bool provedOptimal;
+  unsigned r0Reductions, r1Reductions,
+           r2Reductions, rNReductions;
+
+public:
+
+  Solution() :
+    solutionCost(0.0), provedOptimal(false),
+    r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
+
+  Solution(unsigned length, bool assumeOptimal) :
+    selections(length), solutionCost(0.0), provedOptimal(assumeOptimal),
+    r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
+
+  void setProvedOptimal(bool provedOptimal) {
+    this->provedOptimal = provedOptimal;
+  }
+
+  void setSelection(unsigned nodeID, unsigned selection) {
+    selections[nodeID] = selection;
+  }
+
+  void setSolutionCost(PBQPNum solutionCost) {
+    this->solutionCost = solutionCost;
+  }
+
+  void incR0Reductions() { ++r0Reductions; }
+  void incR1Reductions() { ++r1Reductions; }
+  void incR2Reductions() { ++r2Reductions; }
+  void incRNReductions() { ++rNReductions; }
+
+  unsigned numNodes() const { return selections.size(); }
+
+  unsigned getSelection(unsigned nodeID) const {
+    return selections[nodeID];
+  }
+
+  PBQPNum getCost() const { return solutionCost; }
+
+  bool isProvedOptimal() const { return provedOptimal; }
+
+  unsigned getR0Reductions() const { return r0Reductions; }
+  unsigned getR1Reductions() const { return r1Reductions; }
+  unsigned getR2Reductions() const { return r2Reductions; }
+  unsigned getRNReductions() const { return rNReductions; }
+
+  bool operator==(const Solution &other) const {
+    return (selections == other.selections);
+  }
+
+  bool operator!=(const Solution &other) const {
+    return !(*this == other);
+  }
+
+};
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h
new file mode 100644
index 0000000..a9c5f83
--- /dev/null
+++ b/lib/CodeGen/PBQP/Solver.h
@@ -0,0 +1,31 @@
+//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_SOLVER_H
+#define LLVM_CODEGEN_PBQP_SOLVER_H
+
+#include "SimpleGraph.h"
+#include "Solution.h"
+
+namespace PBQP {
+
+/// \brief Interface for solver classes.
+class Solver {
+public:
+
+  virtual ~Solver() = 0;
+  virtual Solution solve(const SimpleGraph &orig) const = 0;
+};
+
+Solver::~Solver() {}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLVER_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index c5c76fc..8071b0a 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
+#include "PHIElimination.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/CodeGen/LiveVariables.h"
@@ -22,7 +23,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -34,78 +34,25 @@ using namespace llvm;
 
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
 
-namespace {
-  class VISIBILITY_HIDDEN PNE : public MachineFunctionPass {
-    MachineRegisterInfo  *MRI; // Machine register information
-
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    PNE() : MachineFunctionPass(&ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<LiveVariables>();
-      AU.addPreservedID(MachineLoopInfoID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-  private:
-    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
-    /// in predecessor basic blocks.
-    ///
-    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
-    void LowerAtomicPHINode(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator AfterPHIsIt);
-
-    /// analyzePHINodes - Gather information about the PHI nodes in
-    /// here. In particular, we want to map the number of uses of a virtual
-    /// register which is used in a PHI node. We map that to the BB the
-    /// vreg is coming from. This is used later to determine when the vreg
-    /// is killed in the BB.
-    ///
-    void analyzePHINodes(const MachineFunction& Fn);
-
-    // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
-    // SrcReg.  This needs to be after any def or uses of SrcReg, but before
-    // any subsequent point where control flow might jump out of the basic
-    // block.
-    MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                                    unsigned SrcReg);
-
-    // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
-    // also after any exception handling labels: in landing pads execution
-    // starts at the label, so any copies placed before it won't be executed!
-    MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
-                                                MachineBasicBlock::iterator I) {
-      // Rather than assuming that EH labels come before other kinds of labels,
-      // just skip all labels.
-      while (I != MBB.end() &&
-             (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
-        ++I;
-      return I;
-    }
-
-    typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
-    typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
-
-    VRegPHIUse VRegPHIUseCount;
-
-    // Defs of PHI sources which are implicit_def.
-    SmallPtrSet<MachineInstr*, 4> ImpDefs;
-  };
-}
-
-char PNE::ID = 0;
-static RegisterPass<PNE>
+char PHIElimination::ID = 0;
+static RegisterPass<PHIElimination>
 X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
 
 const PassInfo *const llvm::PHIEliminationID = &X;
 
-bool PNE::runOnMachineFunction(MachineFunction &Fn) {
+void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addPreserved<LiveVariables>();
+  AU.addPreservedID(MachineLoopInfoID);
+  AU.addPreservedID(MachineDominatorsID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   MRI = &Fn.getRegInfo();
 
+  PHIDefs.clear();
+  PHIKills.clear();
   analyzePHINodes(Fn);
 
   bool Changed = false;
@@ -132,7 +79,8 @@ bool PNE::runOnMachineFunction(MachineFunction &Fn) {
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
 ///
-bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) {
+bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+                                             MachineBasicBlock &MBB) {
   if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
     return false;   // Quick exit for basic blocks without PHIs.
 
@@ -162,8 +110,9 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
 // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
 // This needs to be after any def or uses of SrcReg, but before any subsequent
 // point where control flow might jump out of the basic block.
-MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                                     unsigned SrcReg) {
+MachineBasicBlock::iterator
+llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
+                                          unsigned SrcReg) {
   // Handle the trivial case trivially.
   if (MBB.empty())
     return MBB.begin();
@@ -206,9 +155,10 @@ MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB,
 /// under the assuption that it needs to be lowered in a way that supports
 /// atomic execution of PHIs.  This lowering method is always correct all of the
 /// time.
-/// 
-void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator AfterPHIsIt) {
+///  
+void llvm::PHIElimination::LowerAtomicPHINode(
+                                      MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator AfterPHIsIt) {
   // Unlink the PHI node from the basic block, but don't delete the PHI yet.
   MachineInstr *MPhi = MBB.remove(MBB.begin());
 
@@ -235,6 +185,10 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
     TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
   }
 
+  // Record PHI def.
+  assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); 
+  PHIDefs[DestReg] = &MBB;
+
   // Update live variable information if there is any.
   LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
   if (LV) {
@@ -276,6 +230,13 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
     assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
            "Machine PHI Operands must all be virtual registers!");
 
+    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+    // path the PHI.
+    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+    // Record the kill.
+    PHIKills[SrcReg].insert(&opBlock);
+
     // If source is defined by an implicit def, there is no need to insert a
     // copy.
     MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
@@ -284,10 +245,6 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
       continue;
     }
 
-    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
-    // path the PHI.
-    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
-
     // Check to make sure we haven't already emitted the copy for this block.
     // This can happen because PHI nodes may have multiple entries for the same
     // basic block.
@@ -420,7 +377,7 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
 /// used in a PHI node. We map that to the BB the vreg is coming from. This is
 /// used later to determine when the vreg is killed in the BB.
 ///
-void PNE::analyzePHINodes(const MachineFunction& Fn) {
+void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
   for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
        I != E; ++I)
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
new file mode 100644
index 0000000..3d02dfd
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.h
@@ -0,0 +1,125 @@
+//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
+#define LLVM_CODEGEN_PHIELIMINATION_HPP
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+  /// Lower PHI instructions to copies.  
+  class PHIElimination : public MachineFunctionPass {
+    MachineRegisterInfo  *MRI; // Machine register information
+  private:
+
+    typedef SmallSet<MachineBasicBlock*, 4> PHIKillList;
+    typedef DenseMap<unsigned, PHIKillList> PHIKillMap;
+    typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap;
+
+  public:
+
+    typedef PHIKillList::iterator phi_kill_iterator;
+    typedef PHIKillList::const_iterator const_phi_kill_iterator;
+
+    static char ID; // Pass identification, replacement for typeid
+    PHIElimination() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    /// Return true if the given vreg was defined by a PHI intsr prior to
+    /// lowering.
+    bool hasPHIDef(unsigned vreg) const {
+      return PHIDefs.count(vreg);
+    }
+
+    /// Returns the block in which the PHI instruction which defined the
+    /// given vreg used to reside. 
+    MachineBasicBlock* getPHIDefBlock(unsigned vreg) {
+      PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg);
+      assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def.");
+      return phiDefItr->second;
+    }
+
+    /// Returns true if the given vreg was killed by a PHI instr.
+    bool hasPHIKills(unsigned vreg) const {
+      return PHIKills.count(vreg);
+    }
+
+    /// Returns an iterator over the BasicBlocks which contained PHI
+    /// kills of this register prior to lowering.
+    phi_kill_iterator phiKillsBegin(unsigned vreg) {
+      PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
+      assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
+      return phiKillItr->second.begin();
+    } 
+    phi_kill_iterator phiKillsEnd(unsigned vreg) {
+      PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
+      assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
+      return phiKillItr->second.end();
+    }
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+    // SrcReg.  This needs to be after any def or uses of SrcReg, but before
+    // any subsequent point where control flow might jump out of the basic
+    // block.
+    MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+                                                    unsigned SrcReg);
+
+    // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
+    // also after any exception handling labels: in landing pads execution
+    // starts at the label, so any copies placed before it won't be executed!
+    MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
+                                                MachineBasicBlock::iterator I) {
+      // Rather than assuming that EH labels come before other kinds of labels,
+      // just skip all labels.
+      while (I != MBB.end() &&
+             (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
+        ++I;
+      return I;
+    }
+
+    typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
+    typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+    PHIDefMap PHIDefs;
+    PHIKillMap PHIKills;
+
+    // Defs of PHI sources which are implicit_def.
+    SmallPtrSet<MachineInstr*, 4> ImpDefs;
+  };
+
+}
+
+#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index de774685..e52158c 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -19,45 +19,73 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "post-RA-sched"
+#include "ExactHazardRecognizer.h"
+#include "SimpleHazardRecognizer.h"
 #include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include <map>
+#include <set>
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
 STATISTIC(NumStalls, "Number of pipeline stalls");
 
+// Post-RA scheduling is enabled with
+// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+                       cl::desc("Enable scheduling after register allocation"),
+                       cl::init(false), cl::Hidden);
 static cl::opt<bool>
 EnableAntiDepBreaking("break-anti-dependencies",
                       cl::desc("Break post-RA scheduling anti-dependencies"),
                       cl::init(true), cl::Hidden);
-
 static cl::opt<bool>
 EnablePostRAHazardAvoidance("avoid-hazards",
-                      cl::desc("Enable simple hazard-avoidance"),
+                      cl::desc("Enable exact hazard avoidance"),
                       cl::init(true), cl::Hidden);
 
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+
 namespace {
   class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
+    AliasAnalysis *AA;
+
   public:
     static char ID;
     PostRAScheduler() : MachineFunctionPass(&ID) {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
@@ -95,6 +123,9 @@ namespace {
     /// HazardRec - The hazard recognizer to use.
     ScheduleHazardRecognizer *HazardRec;
 
+    /// AA - AliasAnalysis for making memory reference queries.
+    AliasAnalysis *AA;
+
     /// Classes - For live regs that are only used in one register class in a
     /// live range, the register class. If the register is not live, the
     /// corresponding value is null. If the register is live but used in
@@ -106,22 +137,27 @@ namespace {
     /// RegRegs - Map registers to all their references within a live range.
     std::multimap<unsigned, MachineOperand *> RegRefs;
 
-    /// The index of the most recent kill (proceding bottom-up), or ~0u if
-    /// the register is not live.
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
     unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
 
-    /// The index of the most recent complete def (proceding bottom up), or ~0u
-    /// if the register is live.
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
     unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
 
+    /// KeepRegs - A set of registers which are live and cannot be changed to
+    /// break anti-dependencies.
+    SmallSet<unsigned, 4> KeepRegs;
+
   public:
     SchedulePostRATDList(MachineFunction &MF,
                          const MachineLoopInfo &MLI,
                          const MachineDominatorTree &MDT,
-                         ScheduleHazardRecognizer *HR)
+                         ScheduleHazardRecognizer *HR,
+                         AliasAnalysis *aa)
       : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
         AllocatableSet(TRI->getAllocatableSet(MF)),
-        HazardRec(HR) {}
+        HazardRec(HR), AA(aa) {}
 
     ~SchedulePostRATDList() {
       delete HazardRec;
@@ -135,6 +171,11 @@ namespace {
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
     void Schedule();
+    
+    /// FixupKills - Fix register kill flags that have been made
+    /// invalid due to scheduling
+    ///
+    void FixupKills(MachineBasicBlock *MBB);
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
@@ -153,62 +194,15 @@ namespace {
     void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
     void ListScheduleTopDown();
     bool BreakAntiDependencies();
-  };
-
-  /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
-  /// a coarse classification and attempts to avoid that instructions of
-  /// a given class aren't grouped too densely together.
-  class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
-    /// Class - A simple classification for SUnits.
-    enum Class {
-      Other, Load, Store
-    };
-
-    /// Window - The Class values of the most recently issued
-    /// instructions.
-    Class Window[8];
-
-    /// getClass - Classify the given SUnit.
-    Class getClass(const SUnit *SU) {
-      const MachineInstr *MI = SU->getInstr();
-      const TargetInstrDesc &TID = MI->getDesc();
-      if (TID.mayLoad())
-        return Load;
-      if (TID.mayStore())
-        return Store;
-      return Other;
-    }
-
-    /// Step - Rotate the existing entries in Window and insert the
-    /// given class value in position as the most recent.
-    void Step(Class C) {
-      std::copy(Window+1, array_endof(Window), Window);
-      Window[array_lengthof(Window)-1] = C;
-    }
-
-  public:
-    SimpleHazardRecognizer() : Window() {}
-
-    virtual HazardType getHazardType(SUnit *SU) {
-      Class C = getClass(SU);
-      if (C == Other)
-        return NoHazard;
-      unsigned Score = 0;
-      for (unsigned i = 0; i != array_lengthof(Window); ++i)
-        if (Window[i] == C)
-          Score += i + 1;
-      if (Score > array_lengthof(Window) * 2)
-        return Hazard;
-      return NoHazard;
-    }
-
-    virtual void EmitInstruction(SUnit *SU) {
-      Step(getClass(SU));
-    }
-
-    virtual void AdvanceCycle() {
-      Step(Other);
-    }
+    unsigned findSuitableFreeRegister(unsigned AntiDepReg,
+                                      unsigned LastNewReg,
+                                      const TargetRegisterClass *);
+    void StartBlockForKills(MachineBasicBlock *BB);
+    
+    // ToggleKillFlag - Toggle a register operand kill flag. Other
+    // adjustments may be made to the instruction if necessary. Return
+    // true if the operand has been deleted, false if not.
+    bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
   };
 }
 
@@ -235,19 +229,44 @@ static bool isSchedulingBoundary(const MachineInstr *MI,
 }
 
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
-  DOUT << "PostRAScheduler\n";
+  AA = &getAnalysis<AliasAnalysis>();
+
+  // Check for explicit enable/disable of post-ra scheduling.
+  if (EnablePostRAScheduler.getPosition() > 0) {
+    if (!EnablePostRAScheduler)
+      return true;
+  } else {
+    // Check that post-RA scheduling is enabled for this function
+    const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+    if (!ST.enablePostRAScheduler())
+      return true;
+  }
+
+  DEBUG(errs() << "PostRAScheduler\n");
 
   const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
   ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
-                                 new SimpleHazardRecognizer :
-                                 new ScheduleHazardRecognizer();
+    (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
+    (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
 
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR);
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA);
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
        MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+    // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+    if (DebugDiv > 0) {
+      static int bbcnt = 0;
+      if (bbcnt++ % DebugDiv != DebugMod)
+        continue;
+      errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+        ":MBB ID#" << MBB->getNumber() << " ***\n";
+    }
+#endif
+
     // Initialize register live-range state for scheduling in this block.
     Scheduler.StartBlock(MBB);
 
@@ -259,7 +278,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       MachineInstr *MI = prior(I);
       if (isSchedulingBoundary(MI, Fn)) {
         Scheduler.Run(MBB, I, Current, CurrentCount);
-        Scheduler.EmitSchedule();
+        Scheduler.EmitSchedule(0);
         Current = MI;
         CurrentCount = Count - 1;
         Scheduler.Observe(MI, CurrentCount);
@@ -271,10 +290,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
     Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
-    Scheduler.EmitSchedule();
+    Scheduler.EmitSchedule(0);
 
     // Clean up register live-range state.
     Scheduler.FinishBlock();
+
+    // Update register kills
+    Scheduler.FixupKills(MBB);
   }
 
   return true;
@@ -287,6 +309,9 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
   // Call the superclass.
   ScheduleDAGInstrs::StartBlock(BB);
 
+  // Reset the hazard recognizer.
+  HazardRec->Reset();
+
   // Clear out the register class data.
   std::fill(Classes, array_endof(Classes),
             static_cast<const TargetRegisterClass *>(0));
@@ -295,8 +320,13 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
   std::fill(KillIndices, array_endof(KillIndices), ~0u);
   std::fill(DefIndices, array_endof(DefIndices), BB->size());
 
+  // Clear "do not change" set.
+  KeepRegs.clear();
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
   // Determine the live-out physregs for this block.
-  if (!BB->empty() && BB->back().getDesc().isReturn())
+  if (IsReturnBlock) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
          E = MRI.liveout_end(); I != E; ++I) {
@@ -312,7 +342,7 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
         DefIndices[AliasReg] = ~0u;
       }
     }
-  else
+  } else {
     // In a non-return block, examine the live-in regs of all successors.
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
          SE = BB->succ_end(); SI != SE; ++SI)
@@ -330,18 +360,16 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
           DefIndices[AliasReg] = ~0u;
         }
       }
+  }
 
-  // Consider callee-saved registers as live-out, since we're running after
-  // prologue/epilogue insertion so there's no way to add additional
-  // saved registers.
-  //
-  // TODO: If the callee saves and restores these, then we can potentially
-  // use them between the save and the restore. To do that, we could scan
-  // the exit blocks to see which of these registers are defined.
-  // Alternatively, callee-saved registers that aren't saved and restored
-  // could be marked live-in in every block.
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
   for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
     unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
     KillIndices[Reg] = BB->size();
     DefIndices[Reg] = ~0u;
@@ -358,10 +386,10 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
 /// Schedule - Schedule the instruction range using list scheduling.
 ///
 void SchedulePostRATDList::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
   
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(AA);
 
   if (EnableAntiDepBreaking) {
     if (BreakAntiDependencies()) {
@@ -374,10 +402,13 @@ void SchedulePostRATDList::Schedule() {
       SUnits.clear();
       EntrySU = SUnit();
       ExitSU = SUnit();
-      BuildSchedGraph();
+      BuildSchedGraph(AA);
     }
   }
 
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
   AvailableQueue.initNodes(SUnits);
 
   ListScheduleTopDown();
@@ -448,8 +479,10 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    const TargetRegisterClass *NewRC =
-      getInstrOperandRegClass(TRI, MI->getDesc(), i);
+    const TargetRegisterClass *NewRC = 0;
+    
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -473,6 +506,16 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
     // If we're still willing to consider this register, note the reference.
     if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
       RegRefs.insert(std::make_pair(Reg, &MO));
+
+    // It's not safe to change register allocation for source operands of
+    // that have special allocation requirements.
+    if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
+      if (KeepRegs.insert(Reg)) {
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          KeepRegs.insert(*Subreg);
+      }
+    }
   }
 }
 
@@ -492,9 +535,10 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
 
     DefIndices[Reg] = Count;
     KillIndices[Reg] = ~0u;
-          assert(((KillIndices[Reg] == ~0u) !=
-                  (DefIndices[Reg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for Reg!");
+    assert(((KillIndices[Reg] == ~0u) !=
+            (DefIndices[Reg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for Reg!");
+    KeepRegs.erase(Reg);
     Classes[Reg] = 0;
     RegRefs.erase(Reg);
     // Repeat, for all subregs.
@@ -503,6 +547,7 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
       unsigned SubregReg = *Subreg;
       DefIndices[SubregReg] = Count;
       KillIndices[SubregReg] = ~0u;
+      KeepRegs.erase(SubregReg);
       Classes[SubregReg] = 0;
       RegRefs.erase(SubregReg);
     }
@@ -520,8 +565,9 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
     if (Reg == 0) continue;
     if (!MO.isUse()) continue;
 
-    const TargetRegisterClass *NewRC =
-      getInstrOperandRegClass(TRI, MI->getDesc(), i);
+    const TargetRegisterClass *NewRC = 0;
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -551,6 +597,36 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
   }
 }
 
+unsigned
+SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg,
+                                               unsigned LastNewReg,
+                                               const TargetRegisterClass *RC) {
+  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+       RE = RC->allocation_order_end(MF); R != RE; ++R) {
+    unsigned NewReg = *R;
+    // Don't replace a register with itself.
+    if (NewReg == AntiDepReg) continue;
+    // Don't replace a register with one that was recently used to repair
+    // an anti-dependence with this AntiDepReg, because that would
+    // re-introduce that anti-dependence.
+    if (NewReg == LastNewReg) continue;
+    // If NewReg is dead and NewReg's most recent def is not before
+    // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for NewReg!");
+    if (KillIndices[NewReg] != ~0u ||
+        Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+        KillIndices[AntiDepReg] > DefIndices[NewReg])
+      continue;
+    return NewReg;
+  }
+
+  // No registers are free and available!
+  return 0;
+}
+
 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
 /// of the ScheduleDAG and break them by renaming registers.
 ///
@@ -567,8 +643,18 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
       Max = SU;
   }
 
-  DOUT << "Critical path has total latency "
-       << (Max->getDepth() + Max->Latency) << "\n";
+#ifndef NDEBUG
+  {
+    DEBUG(errs() << "Critical path has total latency "
+          << (Max->getDepth() + Max->Latency) << "\n");
+    DEBUG(errs() << "Available regs:");
+    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+      if (KillIndices[Reg] == ~0u)
+        DEBUG(errs() << " " << TRI->getName(Reg));
+    }
+    DEBUG(errs() << '\n');
+  }
+#endif
 
   // Track progress along the critical path through the SUnit graph as we walk
   // the instructions.
@@ -598,7 +684,7 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   // isn't A which is free.  This re-introduces anti-dependencies
   // at all but one of the original anti-dependencies that we were
   // trying to break.  To avoid this, keep track of the most recent
-  // register that each register was replaced with, avoid avoid
+  // register that each register was replaced with, avoid
   // using it to repair an anti-dependence on the same register.
   // This lets us produce this:
   //   A = ...
@@ -627,13 +713,6 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
        I != E; --Count) {
     MachineInstr *MI = --I;
 
-    // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
-    // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
-    // is left behind appearing to clobber the super-register, while the
-    // subregister needs to remain live. So we just ignore them.
-    if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-      continue;
-
     // Check if this instruction has a dependence on the critical path that
     // is an anti-dependence that we may be able to break. If it is, set
     // AntiDepReg to the non-zero register associated with the anti-dependence.
@@ -656,8 +735,12 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          // Don't break anti-dependencies on non-allocatable registers.
           if (!AllocatableSet.test(AntiDepReg))
+            // Don't break anti-dependencies on non-allocatable registers.
+            AntiDepReg = 0;
+          else if (KeepRegs.count(AntiDepReg))
+            // Don't break anti-dependencies if an use down below requires
+            // this exact register.
             AntiDepReg = 0;
           else {
             // If the SUnit has other dependencies on the SUnit that it
@@ -689,16 +772,22 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
 
     PrescanInstruction(MI);
 
-    // If this instruction has a use of AntiDepReg, breaking it
-    // is invalid.
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg()) continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0) continue;
-      if (MO.isUse() && AntiDepReg == Reg) {
-        AntiDepReg = 0;
-        break;
+    if (MI->getDesc().hasExtraDefRegAllocReq())
+      // If this instruction's defs have special allocation requirement, don't
+      // break this anti-dependency.
+      AntiDepReg = 0;
+    else if (AntiDepReg) {
+      // If this instruction has a use of AntiDepReg, breaking it
+      // is invalid.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (Reg == 0) continue;
+        if (MO.isUse() && AntiDepReg == Reg) {
+          AntiDepReg = 0;
+          break;
+        }
       }
     }
 
@@ -715,60 +804,43 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
     if (AntiDepReg != 0) {
-      for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
-           RE = RC->allocation_order_end(MF); R != RE; ++R) {
-        unsigned NewReg = *R;
-        // Don't replace a register with itself.
-        if (NewReg == AntiDepReg) continue;
-        // Don't replace a register with one that was recently used to repair
-        // an anti-dependence with this AntiDepReg, because that would
-        // re-introduce that anti-dependence.
-        if (NewReg == LastNewReg[AntiDepReg]) continue;
-        // If NewReg is dead and NewReg's most recent def is not before
-        // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
-        assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for AntiDepReg!");
-        assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for NewReg!");
-        if (KillIndices[NewReg] == ~0u &&
-            Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) &&
-            KillIndices[AntiDepReg] <= DefIndices[NewReg]) {
-          DOUT << "Breaking anti-dependence edge on "
-               << TRI->getName(AntiDepReg)
-               << " with " << RegRefs.count(AntiDepReg) << " references"
-               << " using " << TRI->getName(NewReg) << "!\n";
-
-          // Update the references to the old register to refer to the new
-          // register.
-          std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
-                    std::multimap<unsigned, MachineOperand *>::iterator>
-             Range = RegRefs.equal_range(AntiDepReg);
-          for (std::multimap<unsigned, MachineOperand *>::iterator
-               Q = Range.first, QE = Range.second; Q != QE; ++Q)
-            Q->second->setReg(NewReg);
-
-          // We just went back in time and modified history; the
-          // liveness information for the anti-depenence reg is now
-          // inconsistent. Set the state as if it were dead.
-          Classes[NewReg] = Classes[AntiDepReg];
-          DefIndices[NewReg] = DefIndices[AntiDepReg];
-          KillIndices[NewReg] = KillIndices[AntiDepReg];
-          assert(((KillIndices[NewReg] == ~0u) !=
-                  (DefIndices[NewReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for NewReg!");
-
-          Classes[AntiDepReg] = 0;
-          DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
-          KillIndices[AntiDepReg] = ~0u;
-          assert(((KillIndices[AntiDepReg] == ~0u) !=
-                  (DefIndices[AntiDepReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for AntiDepReg!");
-
-          RegRefs.erase(AntiDepReg);
-          Changed = true;
-          LastNewReg[AntiDepReg] = NewReg;
-          break;
-        }
+      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
+                                                     LastNewReg[AntiDepReg],
+                                                     RC)) {
+        DEBUG(errs() << "Breaking anti-dependence edge on "
+              << TRI->getName(AntiDepReg)
+              << " with " << RegRefs.count(AntiDepReg) << " references"
+              << " using " << TRI->getName(NewReg) << "!\n");
+
+        // Update the references to the old register to refer to the new
+        // register.
+        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                  std::multimap<unsigned, MachineOperand *>::iterator>
+           Range = RegRefs.equal_range(AntiDepReg);
+        for (std::multimap<unsigned, MachineOperand *>::iterator
+             Q = Range.first, QE = Range.second; Q != QE; ++Q)
+          Q->second->setReg(NewReg);
+
+        // We just went back in time and modified history; the
+        // liveness information for the anti-depenence reg is now
+        // inconsistent. Set the state as if it were dead.
+        Classes[NewReg] = Classes[AntiDepReg];
+        DefIndices[NewReg] = DefIndices[AntiDepReg];
+        KillIndices[NewReg] = KillIndices[AntiDepReg];
+        assert(((KillIndices[NewReg] == ~0u) !=
+                (DefIndices[NewReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for NewReg!");
+
+        Classes[AntiDepReg] = 0;
+        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+        KillIndices[AntiDepReg] = ~0u;
+        assert(((KillIndices[AntiDepReg] == ~0u) !=
+                (DefIndices[AntiDepReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for AntiDepReg!");
+
+        RegRefs.erase(AntiDepReg);
+        Changed = true;
+        LastNewReg[AntiDepReg] = NewReg;
       }
     }
 
@@ -778,6 +850,177 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   return Changed;
 }
 
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+  // Initialize the indices to indicate that no registers are live.
+  std::fill(KillIndices, array_endof(KillIndices), ~0u);
+
+  // Determine the live-out physregs for this block.
+  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+           E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      KillIndices[Reg] = BB->size();
+      // Repeat, for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = BB->size();
+      }
+    }
+  }
+  else {
+    // In a non-return block, examine the live-in regs of all successors.
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI) {
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        KillIndices[Reg] = BB->size();
+        // Repeat, for all subregs.
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          KillIndices[*Subreg] = BB->size();
+        }
+      }
+    }
+  }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+                                          MachineOperand &MO) {
+  // Setting kill flag...
+  if (!MO.isKill()) {
+    MO.setIsKill(true);
+    return false;
+  }
+  
+  // If MO itself is live, clear the kill flag...
+  if (KillIndices[MO.getReg()] != ~0u) {
+    MO.setIsKill(false);
+    return false;
+  }
+
+  // If any subreg of MO is live, then create an imp-def for that
+  // subreg and keep MO marked as killed.
+  MO.setIsKill(false);
+  bool AllDead = true;
+  const unsigned SuperReg = MO.getReg();
+  for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+       *Subreg; ++Subreg) {
+    if (KillIndices[*Subreg] != ~0u) {
+      MI->addOperand(MachineOperand::CreateReg(*Subreg,
+                                               true  /*IsDef*/,
+                                               true  /*IsImp*/,
+                                               false /*IsKill*/,
+                                               false /*IsDead*/));
+      AllDead = false;
+    }
+  }
+
+  if(AllDead)
+    MO.setIsKill(true);
+  return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+  DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
+
+  std::set<unsigned> killedRegs;
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+  StartBlockForKills(MBB);
+  
+  // Examine block from end to start...
+  unsigned Count = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+
+    // Update liveness.  Registers that are defed but not used in this
+    // instruction are now dead. Mark register and all subregs as they
+    // are completely defined.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+      
+      KillIndices[Reg] = ~0u;
+      
+      // Repeat for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = ~0u;
+      }
+    }
+
+    // Examine all used registers and set/clear kill flag. When a
+    // register is used multiple times we only set the kill flag on
+    // the first use.
+    killedRegs.clear();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      bool kill = false;
+      if (killedRegs.find(Reg) == killedRegs.end()) {
+        kill = true;
+        // A register is not killed if any subregs are live...
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          if (KillIndices[*Subreg] != ~0u) {
+            kill = false;
+            break;
+          }
+        }
+
+        // If subreg is not live, then register is killed if it became
+        // live in this instruction
+        if (kill)
+          kill = (KillIndices[Reg] == ~0u);
+      }
+      
+      if (MO.isKill() != kill) {
+        bool removed = ToggleKillFlag(MI, MO);
+        if (removed) {
+          DEBUG(errs() << "Fixed <removed> in ");
+        } else {
+          DEBUG(errs() << "Fixed " << MO << " in ");
+        }
+        DEBUG(MI->dump());
+      }
+      
+      killedRegs.insert(Reg);
+    }
+    
+    // Mark any used register (that is not using undef) and subregs as
+    // now live...
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      KillIndices[Reg] = Count;
+      
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = Count;
+      }
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Top-Down Scheduling
 //===----------------------------------------------------------------------===//
@@ -786,17 +1029,17 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
 void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
-  --SuccSU->NumPredsLeft;
-  
+
 #ifndef NDEBUG
-  if (SuccSU->NumPredsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (SuccSU->NumPredsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --SuccSU->NumPredsLeft;
+
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
@@ -819,7 +1062,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -848,6 +1091,10 @@ void SchedulePostRATDList::ListScheduleTopDown() {
     }
   }
 
+  // In any cycle where we can't schedule any instructions, we must
+  // stall or emit a noop, depending on the target.
+  bool CycleHasInsts = false;
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   std::vector<SUnit*> NotReady;
@@ -866,13 +1113,14 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       } else if (PendingQueue[i]->getDepth() < MinDepth)
         MinDepth = PendingQueue[i]->getDepth();
     }
-    
-    // If there are no instructions available, don't try to issue anything, and
-    // don't advance the hazard recognizer.
-    if (AvailableQueue.empty()) {
-      CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
-      continue;
-    }
+
+    DEBUG(errs() << "\n*** Examining Available\n";
+          LatencyPriorityQueue q = AvailableQueue;
+          while (!q.empty()) {
+            SUnit *su = q.pop();
+            errs() << "Height " << su->getHeight() << ": ";
+            su->dump(this);
+          });
 
     SUnit *FoundSUnit = 0;
 
@@ -903,27 +1151,38 @@ void SchedulePostRATDList::ListScheduleTopDown() {
     if (FoundSUnit) {
       ScheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
-
-      // If this is a pseudo-op node, we don't want to increment the current
-      // cycle.
-      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
-        ++CurCycle;
-    } else if (!HasNoopHazards) {
-      // Otherwise, we have a pipeline stall, but no other problem, just advance
-      // the current cycle and try again.
-      DOUT << "*** Advancing cycle, no work to do\n";
-      HazardRec->AdvanceCycle();
-      ++NumStalls;
-      ++CurCycle;
+      CycleHasInsts = true;
+
+      // If we are using the target-specific hazards, then don't
+      // advance the cycle time just because we schedule a node. If
+      // the target allows it we can schedule multiple nodes in the
+      // same cycle.
+      if (!EnablePostRAHazardAvoidance) {
+        if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+          ++CurCycle;
+      }
     } else {
-      // Otherwise, we have no instructions to issue and we have instructions
-      // that will fault if we don't do this right.  This is the case for
-      // processors without pipeline interlocks and other cases.
-      DOUT << "*** Emitting noop\n";
-      HazardRec->EmitNoop();
-      Sequence.push_back(0);   // NULL here means noop
-      ++NumNoops;
+      if (CycleHasInsts) {
+        DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+      } else if (!HasNoopHazards) {
+        // Otherwise, we have a pipeline stall, but no other problem,
+        // just advance the current cycle and try again.
+        DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+        ++NumStalls;
+      } else {
+        // Otherwise, we have no instructions to issue and we have instructions
+        // that will fault if we don't do this right.  This is the case for
+        // processors without pipeline interlocks and other cases.
+        DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+        HazardRec->EmitNoop();
+        Sequence.push_back(0);   // NULL here means noop
+        ++NumNoops;
+      }
+
       ++CurCycle;
+      CycleHasInsts = false;
     }
   }
 
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index ae60c86..8fa07d4 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -67,7 +68,7 @@ namespace {
     MachineBasicBlock     *BarrierMBB;
 
     // Barrier - Current barrier index.
-    unsigned              BarrierIdx;
+    LiveIndex     BarrierIdx;
 
     // CurrLI - Current live interval being split.
     LiveInterval          *CurrLI;
@@ -82,7 +83,7 @@ namespace {
     DenseMap<unsigned, int> IntervalSSMap;
 
     // Def2SpillMap - A map from a def instruction index to spill index.
-    DenseMap<unsigned, unsigned> Def2SpillMap;
+    DenseMap<LiveIndex, LiveIndex> Def2SpillMap;
 
   public:
     static char ID;
@@ -91,6 +92,7 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<LiveIntervals>();
       AU.addPreserved<LiveIntervals>();
       AU.addRequired<LiveStacks>();
@@ -119,33 +121,31 @@ namespace {
     }
 
     /// print - Implement the dump method.
-    virtual void print(std::ostream &O, const Module* M = 0) const {
+    virtual void print(raw_ostream &O, const Module* M = 0) const {
       LIs->print(O, M);
     }
 
-    void print(std::ostream *O, const Module* M = 0) const {
-      if (O) print(*O, M);
-    }
 
   private:
     MachineBasicBlock::iterator
       findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
-                        unsigned&);
+                        LiveIndex&);
 
     MachineBasicBlock::iterator
       findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
-                     SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+                     SmallPtrSet<MachineInstr*, 4>&, LiveIndex&);
 
     MachineBasicBlock::iterator
-      findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned,
-                     SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+      findRestorePoint(MachineBasicBlock*, MachineInstr*, LiveIndex,
+                     SmallPtrSet<MachineInstr*, 4>&, LiveIndex&);
 
     int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
 
-    bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned,
-                            unsigned&, int&) const;
+    bool IsAvailableInStack(MachineBasicBlock*, unsigned,
+                            LiveIndex, LiveIndex,
+                            LiveIndex&, int&) const;
 
-    void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned);
+    void UpdateSpillSlotInterval(VNInfo*, LiveIndex, LiveIndex);
 
     bool SplitRegLiveInterval(LiveInterval*);
 
@@ -157,7 +157,7 @@ namespace {
     bool Rematerialize(unsigned vreg, VNInfo* ValNo,
                        MachineInstr* DefMI,
                        MachineBasicBlock::iterator RestorePt,
-                       unsigned RestoreIdx,
+                       LiveIndex RestoreIdx,
                        SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
     MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
                             MachineInstr* DefMI,
@@ -209,11 +209,12 @@ const PassInfo *const llvm::PreAllocSplittingID = &X;
 /// instruction index map. If there isn't one, return end().
 MachineBasicBlock::iterator
 PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
-                                     unsigned &SpotIndex) {
+                                     LiveIndex &SpotIndex) {
   MachineBasicBlock::iterator MII = MI;
   if (++MII != MBB->end()) {
-    unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
-    if (Index) {
+    LiveIndex Index =
+      LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
+    if (Index != LiveIndex()) {
       SpotIndex = Index;
       return MII;
     }
@@ -229,7 +230,7 @@ MachineBasicBlock::iterator
 PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
                                   MachineInstr *DefMI,
                                   SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                  unsigned &SpillIndex) {
+                                  LiveIndex &SpillIndex) {
   MachineBasicBlock::iterator Pt = MBB->begin();
 
   MachineBasicBlock::iterator MII = MI;
@@ -242,7 +243,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
   if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
     
   while (MII != EndPt && !RefsInMBB.count(MII)) {
-    unsigned Index = LIs->getInstructionIndex(MII);
+    LiveIndex Index = LIs->getInstructionIndex(MII);
     
     // We can't insert the spill between the barrier (a call), and its
     // corresponding call frame setup.
@@ -275,9 +276,9 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
 /// found.
 MachineBasicBlock::iterator
 PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                    unsigned LastIdx,
+                                    LiveIndex LastIdx,
                                     SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                    unsigned &RestoreIndex) {
+                                    LiveIndex &RestoreIndex) {
   // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
   // begin index accordingly.
   MachineBasicBlock::iterator Pt = MBB->end();
@@ -298,10 +299,10 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
   // FIXME: Limit the number of instructions to examine to reduce
   // compile time?
   while (MII != EndPt) {
-    unsigned Index = LIs->getInstructionIndex(MII);
+    LiveIndex Index = LIs->getInstructionIndex(MII);
     if (Index > LastIdx)
       break;
-    unsigned Gap = LIs->findGapBeforeInstr(Index);
+    LiveIndex Gap = LIs->findGapBeforeInstr(Index);
       
     // We can't insert a restore between the barrier (a call) and its 
     // corresponding call frame teardown.
@@ -310,7 +311,7 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
         if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
         ++MII;
       } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
-    } else if (Gap) {
+    } else if (Gap != LiveIndex()) {
       Pt = MII;
       RestoreIndex = Gap;
     }
@@ -343,7 +344,8 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
+    CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false,
+                                       LSs->getVNInfoAllocator());
   return SS;
 }
 
@@ -351,8 +353,9 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
 /// slot at the specified index.
 bool
 PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
-                                    unsigned Reg, unsigned DefIndex,
-                                    unsigned RestoreIndex, unsigned &SpillIndex,
+                                    unsigned Reg, LiveIndex DefIndex,
+                                    LiveIndex RestoreIndex,
+                                    LiveIndex &SpillIndex,
                                     int& SS) const {
   if (!DefMBB)
     return false;
@@ -360,7 +363,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
   DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
   if (I == IntervalSSMap.end())
     return false;
-  DenseMap<unsigned, unsigned>::iterator II = Def2SpillMap.find(DefIndex);
+  DenseMap<LiveIndex, LiveIndex>::iterator
+    II = Def2SpillMap.find(DefIndex);
   if (II == Def2SpillMap.end())
     return false;
 
@@ -380,8 +384,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
 /// interval being split, and the spill and restore indicies, update the live
 /// interval of the spill stack slot.
 void
-PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
-                                           unsigned RestoreIndex) {
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex,
+                                           LiveIndex RestoreIndex) {
   assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
          "Expect restore in the barrier mbb");
 
@@ -394,8 +398,8 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
   }
 
   SmallPtrSet<MachineBasicBlock*, 4> Processed;
-  unsigned EndIdx = LIs->getMBBEndIdx(MBB);
-  LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo);
+  LiveIndex EndIdx = LIs->getMBBEndIdx(MBB);
+  LiveRange SLR(SpillIndex, LIs->getNextSlot(EndIdx), CurrSValNo);
   CurrSLI->addRange(SLR);
   Processed.insert(MBB);
 
@@ -414,7 +418,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
     WorkList.pop_back();
     if (Processed.count(MBB))
       continue;
-    unsigned Idx = LIs->getMBBStartIdx(MBB);
+    LiveIndex Idx = LIs->getMBBStartIdx(MBB);
     LR = CurrLI->getLiveRangeContaining(Idx);
     if (LR && LR->valno == ValNo) {
       EndIdx = LIs->getMBBEndIdx(MBB);
@@ -424,7 +428,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
         CurrSLI->addRange(SLR);
       } else if (LR->end > EndIdx) {
         // Live range extends beyond end of mbb, process successors.
-        LiveRange SLR(Idx, EndIdx+1, CurrSValNo);
+        LiveRange SLR(Idx, LIs->getNextIndex(EndIdx), CurrSValNo);
         CurrSLI->addRange(SLR);
         for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
                SE = MBB->succ_end(); SI != SE; ++SI)
@@ -487,12 +491,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     }
     
     // Once we've found it, extend its VNInfo to our instruction.
-    unsigned DefIndex = LIs->getInstructionIndex(Walker);
-    DefIndex = LiveIntervals::getDefIndex(DefIndex);
-    unsigned EndIndex = LIs->getMBBEndIdx(MBB);
+    LiveIndex DefIndex = LIs->getInstructionIndex(Walker);
+    DefIndex = LIs->getDefIndex(DefIndex);
+    LiveIndex EndIndex = LIs->getMBBEndIdx(MBB);
     
     RetVNI = NewVNs[Walker];
-    LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI));
+    LI->addRange(LiveRange(DefIndex, LIs->getNextSlot(EndIndex), RetVNI));
   } else if (!ContainsDefs && ContainsUses) {
     SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
     
@@ -524,12 +528,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
                                               IsTopLevel, IsIntraBlock);
     }
 
-    unsigned UseIndex = LIs->getInstructionIndex(Walker);
-    UseIndex = LiveIntervals::getUseIndex(UseIndex);
-    unsigned EndIndex = 0;
+    LiveIndex UseIndex = LIs->getInstructionIndex(Walker);
+    UseIndex = LIs->getUseIndex(UseIndex);
+    LiveIndex EndIndex;
     if (IsIntraBlock) {
       EndIndex = LIs->getInstructionIndex(UseI);
-      EndIndex = LiveIntervals::getUseIndex(EndIndex);
+      EndIndex = LIs->getUseIndex(EndIndex);
     } else
       EndIndex = LIs->getMBBEndIdx(MBB);
 
@@ -538,12 +542,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
                                     NewVNs, LiveOut, Phis, false, true);
     
-    LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI));
+    LI->addRange(LiveRange(UseIndex, LIs->getNextSlot(EndIndex), RetVNI));
     
     // FIXME: Need to set kills properly for inter-block stuff.
-    if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex);
+    if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex);
     if (IsIntraBlock)
-      LI->addKill(RetVNI, EndIndex);
+      RetVNI->addKill(EndIndex);
   } else if (ContainsDefs && ContainsUses) {
     SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
     SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
@@ -584,13 +588,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
                                               IsTopLevel, IsIntraBlock);
     }
 
-    unsigned StartIndex = LIs->getInstructionIndex(Walker);
-    StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) :
-                            LiveIntervals::getUseIndex(StartIndex);
-    unsigned EndIndex = 0;
+    LiveIndex StartIndex = LIs->getInstructionIndex(Walker);
+    StartIndex = foundDef ? LIs->getDefIndex(StartIndex) :
+                            LIs->getUseIndex(StartIndex);
+    LiveIndex EndIndex;
     if (IsIntraBlock) {
       EndIndex = LIs->getInstructionIndex(UseI);
-      EndIndex = LiveIntervals::getUseIndex(EndIndex);
+      EndIndex = LIs->getUseIndex(EndIndex);
     } else
       EndIndex = LIs->getMBBEndIdx(MBB);
 
@@ -600,12 +604,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
       RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
                                       NewVNs, LiveOut, Phis, false, true);
 
-    LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+    LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI));
     
-    if (foundUse && LI->isKill(RetVNI, StartIndex))
-      LI->removeKill(RetVNI, StartIndex);
+    if (foundUse && RetVNI->isKill(StartIndex))
+      RetVNI->removeKill(StartIndex);
     if (IsIntraBlock) {
-      LI->addKill(RetVNI, EndIndex);
+      RetVNI->addKill(EndIndex);
     }
   }
   
@@ -636,9 +640,10 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
   // assume that we are not intrablock here.
   if (Phis.count(MBB)) return Phis[MBB]; 
 
-  unsigned StartIndex = LIs->getMBBStartIdx(MBB);
+  LiveIndex StartIndex = LIs->getMBBStartIdx(MBB);
   VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator());
+    LI->getNextValue(LiveIndex(), /*FIXME*/ 0, false,
+                     LIs->getVNInfoAllocator());
 
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
     
@@ -680,21 +685,21 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
     for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
            IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
       I->second->setHasPHIKill(true);
-      unsigned KillIndex = LIs->getMBBEndIdx(I->first);
-      if (!LiveInterval::isKill(I->second, KillIndex))
-        LI->addKill(I->second, KillIndex);
+      LiveIndex KillIndex = LIs->getMBBEndIdx(I->first);
+      if (!I->second->isKill(KillIndex))
+        I->second->addKill(KillIndex);
     }
   }
       
-  unsigned EndIndex = 0;
+  LiveIndex EndIndex;
   if (IsIntraBlock) {
     EndIndex = LIs->getInstructionIndex(UseI);
-    EndIndex = LiveIntervals::getUseIndex(EndIndex);
+    EndIndex = LIs->getUseIndex(EndIndex);
   } else
     EndIndex = LIs->getMBBEndIdx(MBB);
-  LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+  LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI));
   if (IsIntraBlock)
-    LI->addKill(RetVNI, EndIndex);
+    RetVNI->addKill(EndIndex);
 
   // Memoize results so we don't have to recompute them.
   if (!IsIntraBlock)
@@ -728,8 +733,8 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
        DE = MRI->def_end(); DI != DE; ++DI) {
     Defs[(*DI).getParent()].insert(&*DI);
     
-    unsigned DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = LiveIntervals::getDefIndex(DefIdx);
+    LiveIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = LIs->getDefIndex(DefIdx);
     
     assert(DI->getOpcode() != TargetInstrInfo::PHI &&
            "Following NewVN isPHIDef flag incorrect. Fix me!");
@@ -739,7 +744,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
       if (DstReg == LI->reg)
-        NewVN->copy = &*DI;
+        NewVN->setCopy(&*DI);
     
     NewVNs[&*DI] = NewVN;
   }
@@ -764,14 +769,32 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
   // Add ranges for dead defs
   for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
        DE = MRI->def_end(); DI != DE; ++DI) {
-    unsigned DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = LiveIntervals::getDefIndex(DefIdx);
+    LiveIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = LIs->getDefIndex(DefIdx);
     
     if (LI->liveAt(DefIdx)) continue;
     
     VNInfo* DeadVN = NewVNs[&*DI];
-    LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN));
-    LI->addKill(DeadVN, DefIdx);
+    LI->addRange(LiveRange(DefIdx, LIs->getNextSlot(DefIdx), DeadVN));
+    DeadVN->addKill(DefIdx);
+  }
+
+  // Update kill markers.
+  for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+       VI != VE; ++VI) {
+    VNInfo* VNI = *VI;
+    for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) {
+      LiveIndex KillIdx = VNI->kills[i];
+      if (KillIdx.isPHIIndex())
+        continue;
+      MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx);
+      if (KillMI) {
+        MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg);
+        if (KillMO)
+          // It could be a dead def.
+          KillMO->setIsKill();
+      }
+    }
   }
 }
 
@@ -801,14 +824,16 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     VNsToCopy.push_back(OldVN);
     
     // Locate two-address redefinitions
-    for (SmallVector<unsigned, 4>::iterator KI = OldVN->kills.begin(),
+    for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(),
          KE = OldVN->kills.end(); KI != KE; ++KI) {
+      assert(!KI->isPHIIndex() &&
+             "VN previously reported having no PHI kills.");
       MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
       unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
       if (DefIdx == ~0U) continue;
       if (MI->isRegTiedToUseOperand(DefIdx)) {
         VNInfo* NextVN =
-                     CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI));
+          CurrLI->findDefinedVNInfoForRegInt(LIs->getDefIndex(*KI));
         if (NextVN == OldVN) continue;
         Stack.push_back(NextVN);
       }
@@ -840,10 +865,10 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
          E = MRI->reg_end(); I != E; ++I) {
     MachineOperand& MO = I.getOperand();
-    unsigned InstrIdx = LIs->getInstructionIndex(&*I);
+    LiveIndex InstrIdx = LIs->getInstructionIndex(&*I);
     
-    if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) ||
-        (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx))))
+    if ((MO.isUse() && NewLI.liveAt(LIs->getUseIndex(InstrIdx))) ||
+        (MO.isDef() && NewLI.liveAt(LIs->getDefIndex(InstrIdx))))
       OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
   }
   
@@ -865,15 +890,15 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
   NumRenumbers++;
 }
 
-bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
+bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
                                       MachineInstr* DefMI,
                                       MachineBasicBlock::iterator RestorePt,
-                                      unsigned RestoreIdx,
+                                      LiveIndex RestoreIdx,
                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
   MachineBasicBlock& MBB = *RestorePt->getParent();
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  unsigned KillIdx = 0;
+  LiveIndex KillIdx;
   if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
   else
@@ -882,13 +907,13 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
   if (KillPt == DefMI->getParent()->end())
     return false;
   
-  TII->reMaterialize(MBB, RestorePt, vreg, DefMI);
+  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI);
   LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
   
   ReconstructLiveInterval(CurrLI);
-  unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt));
-  RematIdx = LiveIntervals::getDefIndex(RematIdx);
-  RenumberValno(CurrLI->findDefinedVNInfo(RematIdx));
+  LiveIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
+  RematIdx = LIs->getDefIndex(RematIdx);
+  RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
   
   ++NumSplits;
   ++NumRemats;
@@ -943,7 +968,8 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
+      CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false,
+                                         LSs->getVNInfoAllocator());
   }
   
   return FMI;
@@ -1033,11 +1059,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
   VNInfo *ValNo = LR->valno;
 
-  if (ValNo->isUnused()) {
-    // Defined by a dead def? How can this be?
-    assert(0 && "Val# is defined by a dead def?");
-    abort();
-  }
+  assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
 
   MachineInstr *DefMI = ValNo->isDefAccurate()
     ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
@@ -1056,7 +1078,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Find a point to restore the value after the barrier.
-  unsigned RestoreIndex = 0;
+  LiveIndex RestoreIndex;
   MachineBasicBlock::iterator RestorePt =
     findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
   if (RestorePt == BarrierMBB->end())
@@ -1070,7 +1092,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   // Add a spill either before the barrier or after the definition.
   MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
   const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
-  unsigned SpillIndex = 0;
+  LiveIndex SpillIndex;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
   if (!ValNo->isDefAccurate()) {
@@ -1098,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       return false; // Def is dead. Do nothing.
     
     if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
-                            BarrierMBB, SS, RefsInMBB))) {
+                             BarrierMBB, SS, RefsInMBB))) {
       SpillIndex = LIs->getInstructionIndex(SpillMI);
     } else {
       // Check if it's possible to insert a spill after the def MI.
@@ -1114,11 +1136,9 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
         if (SpillPt == DefMBB->end())
           return false; // No gap to insert spill.
       }
-      // Add spill. The store instruction kills the register if def is before
-      // the barrier in the barrier block.
+      // Add spill. 
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg,
-                               DefMBB == BarrierMBB, SS, RC);
+      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
       SpillMI = prior(SpillPt);
       LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
     }
@@ -1142,15 +1162,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Update spill stack slot live interval.
-  UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1,
+  UpdateSpillSlotInterval(ValNo, LIs->getNextSlot(LIs->getUseIndex(SpillIndex)),
                           LIs->getDefIndex(RestoreIndex));
 
   ReconstructLiveInterval(CurrLI);
-  
+
   if (!FoldedRestore) {
-    unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
-    RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx);
-    RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx));
+    LiveIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+    RestoreIdx = LIs->getDefIndex(RestoreIdx);
+    RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
   }
   
   ++NumSplits;
@@ -1189,8 +1209,6 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
   while (!Intervals.empty()) {
     if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
       break;
-    else if (NumSplits == 4)
-      Change |= Change;
     LiveInterval *LI = Intervals.back();
     Intervals.pop_back();
     bool result = SplitRegLiveInterval(LI);
@@ -1236,8 +1254,8 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
     // reaching definition (VNInfo).
     for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
-      unsigned index = LIs->getInstructionIndex(&*UI);
-      index = LiveIntervals::getUseIndex(index);
+      LiveIndex index = LIs->getInstructionIndex(&*UI);
+      index = LIs->getUseIndex(index);
       
       const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
       VNUseCount[LR->valno].insert(&*UI);
@@ -1386,7 +1404,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
   if (LR->valno->hasPHIKill())
     return false;
   
-  unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+  LiveIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
   if (LR->end < MBBEnd)
     return false;
   
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 9e7ad67..8793df7 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -31,7 +31,9 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include <climits>
 
@@ -51,22 +53,26 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
 /// frame indexes with appropriate references.
 ///
 bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+  const Function* F = Fn.getFunction();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
 
   // Get MachineModuleInfo so that we can track the construction of the
   // frame.
   if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
     Fn.getFrameInfo()->setMachineModuleInfo(MMI);
 
+  // Calculate the MaxCallFrameSize and HasCalls variables for the function's
+  // frame information. Also eliminates call frame pseudo instructions.
+  calculateCallsInformation(Fn);
+
   // Allow the target machine to make some adjustments to the function
   // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
   TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
 
-  // Scan the function for modified callee saved registers and insert spill
-  // code for any callee saved registers that are modified.  Also calculate
-  // the MaxCallFrameSize and HasCalls variables for the function's frame
-  // information and eliminates call frame pseudo instructions.
+  // Scan the function for modified callee saved registers and insert spill code
+  // for any callee saved registers that are modified.
   calculateCalleeSavedRegisters(Fn);
 
   // Determine placement of CSR spill/restore code:
@@ -78,7 +84,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   placeCSRSpillsAndRestores(Fn);
 
   // Add the code to save and restore the callee saved registers
-  insertCSRSpillsAndRestores(Fn);
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertCSRSpillsAndRestores(Fn);
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
@@ -92,13 +99,20 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // called functions.  Because of this, calculateCalleeSavedRegisters
   // must be called before this function in order to set the HasCalls
   // and MaxCallFrameSize variables.
-  insertPrologEpilogCode(Fn);
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertPrologEpilogCode(Fn);
 
   // Replace all MO_FrameIndex operands with physical register references
   // and actual offsets.
   //
   replaceFrameIndices(Fn);
 
+  // If register scavenging is needed, as we've enabled doing it as a
+  // post-pass, scavenge the virtual registers that frame index elimiation
+  // inserted.
+  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+    scavengeFrameVirtualRegs(Fn);
+
   delete RS;
   clearAllSets();
   return true;
@@ -117,35 +131,24 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 #endif
 
-/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
-/// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
-/// the function's frame information and eliminates call frame pseudo
-/// instructions.
-///
-void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
 
-  // Get the callee saved register list...
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+  unsigned MaxCallFrameSize = 0;
+  bool HasCalls = false;
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
   int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
 
-  // These are used to keep track the callee-save area. Initialize them.
-  MinCSFrameIndex = INT_MAX;
-  MaxCSFrameIndex = 0;
-
-  // Early exit for targets which have no callee saved registers and no call
-  // frame setup/destroy pseudo instructions.
-  if ((CSRegs == 0 || CSRegs[0] == 0) &&
-      FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+  // Early exit for targets which have no call frame setup/destroy pseudo
+  // instructions.
+  if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
     return;
 
-  unsigned MaxCallFrameSize = 0;
-  bool HasCalls = false;
-
   std::vector<MachineBasicBlock::iterator> FrameSDOps;
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
@@ -157,31 +160,57 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
         if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
         HasCalls = true;
         FrameSDOps.push_back(I);
+      } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) {
+        // An InlineAsm might be a call; assume it is to get the stack frame
+        // aligned correctly for calls.
+        HasCalls = true;
       }
 
   MachineFrameInfo *FFI = Fn.getFrameInfo();
   FFI->setHasCalls(HasCalls);
   FFI->setMaxCallFrameSize(MaxCallFrameSize);
 
-  for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
-    MachineBasicBlock::iterator I = FrameSDOps[i];
-    // If call frames are not being included as part of the stack frame,
-    // and there is no dynamic allocation (therefore referencing frame slots
-    // off sp), leave the pseudo ops alone. We'll eliminate them later.
+  for (std::vector<MachineBasicBlock::iterator>::iterator
+         i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+    MachineBasicBlock::iterator I = *i;
+
+    // If call frames are not being included as part of the stack frame, and
+    // there is no dynamic allocation (therefore referencing frame slots off
+    // sp), leave the pseudo ops alone. We'll eliminate them later.
     if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
       RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
   }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+  MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+  // Get the callee saved register list...
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+  // These are used to keep track the callee-save area. Initialize them.
+  MinCSFrameIndex = INT_MAX;
+  MaxCSFrameIndex = 0;
+
+  // Early exit for targets which have no callee saved registers.
+  if (CSRegs == 0 || CSRegs[0] == 0)
+    return;
 
-  // Now figure out which *callee saved* registers are modified by the current
+  // Figure out which *callee saved* registers are modified by the current
   // function, thus needing to be saved and restored in the prolog/epilog.
-  //
-  const TargetRegisterClass* const *CSRegClasses =
+  const TargetRegisterClass * const *CSRegClasses =
     RegInfo->getCalleeSavedRegClasses(&Fn);
+
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
     if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
-        // If the reg is modified, save it!
+      // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
     } else {
       for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
@@ -198,39 +227,47 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;   // Early exit if no callee saved registers are modified!
 
   unsigned NumFixedSpillSlots;
-  const std::pair<unsigned,int> *FixedSpillSlots =
+  const TargetFrameInfo::SpillSlot *FixedSpillSlots =
     TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
 
   // Now that we know which registers need to be saved and restored, allocate
   // stack slots for them.
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    const TargetRegisterClass *RC = CSI[i].getRegClass();
+  for (std::vector<CalleeSavedInfo>::iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    unsigned Reg = I->getReg();
+    const TargetRegisterClass *RC = I->getRegClass();
+
+    int FrameIdx;
+    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+      I->setFrameIdx(FrameIdx);
+      continue;
+    }
 
     // Check to see if this physreg must be spilled to a particular stack slot
     // on this target.
-    const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
+    const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
     while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
-           FixedSlot->first != Reg)
+           FixedSlot->Reg != Reg)
       ++FixedSlot;
 
-    int FrameIdx;
-    if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
+    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
       // Nope, just spill it anywhere convenient.
       unsigned Align = RC->getAlignment();
       unsigned StackAlign = TFI->getStackAlignment();
-      // We may not be able to sastify the desired alignment specification of
-      // the TargetRegisterClass if the stack alignment is smaller.
-      // Use the min.
+
+      // We may not be able to satisfy the desired alignment specification of
+      // the TargetRegisterClass if the stack alignment is smaller. Use the
+      // min.
       Align = std::min(Align, StackAlign);
       FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
+      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset);
     }
-    CSI[i].setFrameIdx(FrameIdx);
+
+    I->setFrameIdx(FrameIdx);
   }
 
   FFI->setCalleeSavedInfo(CSI);
@@ -244,6 +281,8 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   MachineFrameInfo *FFI = Fn.getFrameInfo();
   const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
 
+  FFI->setCalleeSavedInfoValid(true);
+
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
     return;
@@ -403,8 +442,7 @@ static inline void
 AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign) {
-  // If stack grows down, we need to add size of find the lowest address of the
-  // object.
+  // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
     Offset += FFI->getObjectSize(FrameIdx);
 
@@ -437,16 +475,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *FFI = Fn.getFrameInfo();
 
-  unsigned MaxAlign = FFI->getMaxAlignment();
+  unsigned MaxAlign = 1;
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
   // of stack growth -- so it's always nonnegative.
-  int64_t Offset = TFI.getOffsetOfLocalArea();
+  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
   if (StackGrowsDown)
-    Offset = -Offset;
-  assert(Offset >= 0
+    LocalAreaOffset = -LocalAreaOffset;
+  assert(LocalAreaOffset >= 0
          && "Local area offset should be in direction of stack growth");
+  int64_t Offset = LocalAreaOffset;
 
   // If there are fixed sized objects that are preallocated in the local area,
   // non-fixed objects can't be allocated right at the start of local area.
@@ -538,32 +577,38 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
-  // Round up the size to a multiple of the alignment, but only if there are
-  // calls or alloca's in the function.  This ensures that any calls to
-  // subroutines have their stack frames suitable aligned.
-  // Also do this if we need runtime alignment of the stack.  In this case
-  // offsets will be relative to SP not FP; round up the stack size so this
-  // works.
-  if (!RegInfo->targetHandlesStackFrameRounding() &&
-      (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
-       (RegInfo->needsStackRealignment(Fn) &&
-        FFI->getObjectIndexEnd() != 0))) {
+  if (!RegInfo->targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (RegInfo->hasReservedCallFrame(Fn))
+    if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
       Offset += FFI->getMaxCallFrameSize();
 
-    unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
+    // Round up the size to a multiple of the alignment.  If the function has
+    // any calls or alloca's, align to the target's StackAlignment value to
+    // ensure that the callee's frame or the alloca data is suitably aligned;
+    // otherwise, for leaf functions, align to the TransientStackAlignment
+    // value.
+    unsigned StackAlign;
+    if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+      StackAlign = TFI.getStackAlignment();
+    else
+      StackAlign = TFI.getTransientStackAlignment();
+    // If the frame pointer is eliminated, all frame offsets will be relative
+    // to SP not FP; align to MaxAlign so this works.
+    StackAlign = std::max(StackAlign, MaxAlign);
+    unsigned AlignMask = StackAlign - 1;
     Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
   }
 
   // Update frame info to pretend that this is part of the stack...
-  FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
+  FFI->setStackSize(Offset - LocalAreaOffset);
 
   // Remember the required stack alignment in case targets need it to perform
   // dynamic stack alignment.
-  FFI->setMaxAlignment(MaxAlign);
+  if (MaxAlign > FFI->getMaxAlignment())
+    FFI->setMaxAlignment(MaxAlign);
 }
 
 
@@ -604,14 +649,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   for (MachineFunction::iterator BB = Fn.begin(),
          E = Fn.end(); BB != E; ++BB) {
     int SPAdj = 0;  // SP offset due to call frame setup / destroy.
-    if (RS) RS->enterBasicBlock(BB);
+    if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
 
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
-      if (I->getOpcode() == TargetInstrInfo::DECLARE) {
-        // Ignore it.
-        ++I;
-        continue;
-      }
 
       if (I->getOpcode() == FrameSetupOpcode ||
           I->getOpcode() == FrameDestroyOpcode) {
@@ -654,8 +694,16 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
           // If this instruction has a FrameIndex operand, we need to
           // use that target machine register info object to eliminate
           // it.
-
-          TRI.eliminateFrameIndex(MI, SPAdj, RS);
+          int Value;
+          unsigned VReg =
+            TRI.eliminateFrameIndex(MI, SPAdj, &Value,
+                                    FrameIndexVirtualScavenging ?  NULL : RS);
+          if (VReg) {
+            assert (FrameIndexVirtualScavenging &&
+                    "Not scavenging, but virtual returned from "
+                    "eliminateFrameIndex()!");
+            FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj);
+          }
 
           // Reset the iterator if we were at the beginning of the BB.
           if (AtBeginning) {
@@ -670,10 +718,170 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
       if (DoIncr && I != BB->end()) ++I;
 
       // Update register states.
-      if (RS && MI) RS->forward(MI);
+      if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
     }
 
     assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
   }
 }
 
+/// findLastUseReg - find the killing use of the specified register within
+/// the instruciton range. Return the operand number of the kill in Operand.
+static MachineBasicBlock::iterator
+findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME,
+               unsigned Reg, unsigned *Operand) {
+  // Scan forward to find the last use of this virtual register
+  for (++I; I != ME; ++I) {
+    MachineInstr *MI = I;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+      if (MI->getOperand(i).isReg()) {
+        unsigned OpReg = MI->getOperand(i).getReg();
+        if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg))
+          continue;
+        assert (OpReg == Reg
+                && "overlapping use of scavenged index register!");
+        // If this is the killing use, we're done
+        if (MI->getOperand(i).isKill()) {
+          if (Operand)
+            *Operand = i;
+          return I;
+        }
+      }
+  }
+  // If we hit the end of the basic block, there was no kill of
+  // the virtual register, which is wrong.
+  assert (0 && "scavenged index register never killed!");
+  return ME;
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+  // Run through the instructions and find any virtual registers.
+  for (MachineFunction::iterator BB = Fn.begin(),
+       E = Fn.end(); BB != E; ++BB) {
+    RS->enterBasicBlock(BB);
+
+    unsigned CurrentVirtReg = 0;
+    unsigned CurrentScratchReg = 0;
+    bool havePrevValue = false;
+    unsigned PrevScratchReg = 0;
+    int PrevValue;
+    MachineInstr *PrevLastUseMI = NULL;
+    unsigned PrevLastUseOp = 0;
+    bool trackingCurrentValue = false;
+    int SPAdj = 0;
+    int Value = 0;
+
+    // The instruction stream may change in the loop, so check BB->end()
+    // directly.
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+      MachineInstr *MI = I;
+      // Likewise, call getNumOperands() each iteration, as the MI may change
+      // inside the loop (with 'i' updated accordingly).
+      for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+        if (MI->getOperand(i).isReg()) {
+          MachineOperand &MO = MI->getOperand(i);
+          unsigned Reg = MO.getReg();
+          if (Reg == 0)
+            continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+            // If we have an active scavenged register, we shouldn't be
+            // seeing any references to it.
+            assert (Reg != CurrentScratchReg
+                    && "overlapping use of scavenged frame index register!");
+
+            // If we have a previous scratch reg, check and see if anything
+            // here kills whatever value is in there.
+            if (Reg == PrevScratchReg) {
+              if (MO.isUse()) {
+                // Two-address operands implicitly kill
+                if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+                  havePrevValue = false;
+                  PrevScratchReg = 0;
+                }
+              } else {
+                assert (MO.isDef());
+                havePrevValue = false;
+                PrevScratchReg = 0;
+              }
+            }
+            continue;
+          }
+
+          // Have we already allocated a scratch register for this virtual?
+          if (Reg != CurrentVirtReg) {
+            // When we first encounter a new virtual register, it
+            // must be a definition.
+            assert(MI->getOperand(i).isDef() &&
+                   "frame index virtual missing def!");
+            // We can't have nested virtual register live ranges because
+            // there's only a guarantee of one scavenged register at a time.
+            assert (CurrentVirtReg == 0 &&
+                    "overlapping frame index virtual registers!");
+
+            // If the target gave us information about what's in the register,
+            // we can use that to re-use scratch regs.
+            DenseMap<unsigned, FrameConstantEntry>::iterator Entry =
+              FrameConstantRegMap.find(Reg);
+            trackingCurrentValue = Entry != FrameConstantRegMap.end();
+            if (trackingCurrentValue) {
+              SPAdj = (*Entry).second.second;
+              Value = (*Entry).second.first;
+            } else
+              SPAdj = Value = 0;
+
+            // If the scratch register from the last allocation is still
+            // available, see if the value matches. If it does, just re-use it.
+            if (trackingCurrentValue && havePrevValue && PrevValue == Value) {
+              // FIXME: This assumes that the instructions in the live range
+              // for the virtual register are exclusively for the purpose
+              // of populating the value in the register. That's reasonable
+              // for these frame index registers, but it's still a very, very
+              // strong assumption. Perhaps this implies that the frame index
+              // elimination should be before register allocation, with
+              // conservative heuristics since we'll know less then, and
+              // the reuse calculations done directly when doing the code-gen?
+
+              // Find the last use of the new virtual register. Remove all
+              // instruction between here and there, and update the current
+              // instruction to reference the last use insn instead.
+              MachineBasicBlock::iterator LastUseMI =
+                findLastUseReg(I, BB->end(), Reg, &i);
+              // Remove all instructions up 'til the last use, since they're
+              // just calculating the value we already have.
+              BB->erase(I, LastUseMI);
+              MI = I = LastUseMI;
+
+              CurrentScratchReg = PrevScratchReg;
+              // Extend the live range of the register
+              PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false);
+              RS->setUsed(CurrentScratchReg);
+            } else {
+              CurrentVirtReg = Reg;
+              const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+              CurrentScratchReg = RS->FindUnusedReg(RC);
+              if (CurrentScratchReg == 0)
+                // No register is "free". Scavenge a register.
+                CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+
+              PrevValue = Value;
+            }
+          }
+          assert (CurrentScratchReg && "Missing scratch register!");
+          MI->getOperand(i).setReg(CurrentScratchReg);
+
+          // If this is the last use of the register, stop tracking it.
+          if (MI->getOperand(i).isKill()) {
+            PrevScratchReg = CurrentScratchReg;
+            PrevLastUseMI = MI;
+            PrevLastUseOp = i;
+            CurrentScratchReg = CurrentVirtReg = 0;
+            havePrevValue = trackingCurrentValue;
+          }
+        }
+      RS->forward(MI);
+    }
+  }
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index c158dd8..931f1eb 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class RegScavenger;
@@ -93,6 +94,17 @@ namespace llvm {
     // functions.
     bool ShrinkWrapThisFunction;
 
+    // Flag to control whether to use the register scavenger to resolve
+    // frame index materialization registers. Set according to
+    // TRI->requiresFrameIndexScavenging() for the curren function.
+    bool FrameIndexVirtualScavenging;
+
+    // When using the scavenger post-pass to resolve frame reference
+    // materialization registers, maintain a map of the registers to
+    // the constant value and SP adjustment associated with it.
+    typedef std::pair<int, int> FrameConstantEntry;
+    DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
+
 #ifndef NDEBUG
     // Machine function handle.
     MachineFunction* MF;
@@ -118,10 +130,12 @@ namespace llvm {
                                CSRegBlockMap &prevRestores);
     void placeSpillsAndRestores(MachineFunction &Fn);
     void placeCSRSpillsAndRestores(MachineFunction &Fn);
+    void calculateCallsInformation(MachineFunction &Fn);
     void calculateCalleeSavedRegisters(MachineFunction &Fn);
     void insertCSRSpillsAndRestores(MachineFunction &Fn);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     void replaceFrameIndices(MachineFunction &Fn);
+    void scavengeFrameVirtualRegs(MachineFunction &Fn);
     void insertPrologEpilogCode(MachineFunction &Fn);
 
     // Initialize DFA sets, called before iterations.
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index b4c20e6..00c5d46 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
@@ -38,15 +39,16 @@ static const char *const PSVNames[] = {
   "ConstantPool"
 };
 
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static.  For now, we can safely use the global context for the time being to
+// squeak by.
 PseudoSourceValue::PseudoSourceValue() :
-  Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {}
+  Value(Type::getInt8PtrTy(getGlobalContext()),
+        PseudoSourceValueVal) {}
 
-void PseudoSourceValue::dump() const {
-  print(errs()); errs() << '\n';
-}
-
-void PseudoSourceValue::print(raw_ostream &OS) const {
-  OS << PSVNames[this - *PSVs];
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+  O << PSVNames[this - *PSVs];
 }
 
 namespace {
@@ -61,7 +63,7 @@ namespace {
 
     virtual bool isConstant(const MachineFrameInfo *MFI) const;
 
-    virtual void print(raw_ostream &OS) const {
+    virtual void printCustom(raw_ostream &OS) const {
       OS << "FixedStack" << FI;
     }
   };
@@ -83,7 +85,7 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
       this == getConstantPool() ||
       this == getJumpTable())
     return true;
-  assert(0 && "Unknown PseudoSourceValue!");
+  llvm_unreachable("Unknown PseudoSourceValue!");
   return false;
 }
 
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 64374ce..b655dda4 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -30,44 +30,6 @@ It also increase the likelyhood the store may become dead.
 
 //===---------------------------------------------------------------------===//
 
-I think we should have a "hasSideEffects" flag (which is automatically set for
-stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able
-to remat any instruction that has no side effects, if it can handle it and if
-profitable.
-
-For now, I'd suggest having the remat stuff work like this:
-
-1. I need to spill/reload this thing.
-2. Check to see if it has side effects.
-3. Check to see if it is simple enough: e.g. it only has one register
-destination and no register input.
-4. If so, clone the instruction, do the xform, etc.
-
-Advantages of this are:
-
-1. the .td file describes the behavior of the instructions, not the way the
-   algorithm should work.
-2. as remat gets smarter in the future, we shouldn't have to be changing the .td
-   files.
-3. it is easier to explain what the flag means in the .td file, because you
-   don't have to pull in the explanation of how the current remat algo works.
-
-Some potential added complexities:
-
-1. Some instructions have to be glued to it's predecessor or successor. All of
-   the PC relative instructions and condition code setting instruction. We could
-   mark them as hasSideEffects, but that's not quite right. PC relative loads
-   from constantpools can be remat'ed, for example. But it requires more than
-   just cloning the instruction. Some instructions can be remat'ed but it
-   expands to more than one instruction. But allocator will have to make a
-   decision.
-
-4. As stated in 3, not as simple as cloning in some cases. The target will have
-   to decide how to remat it. For example, an ARM 2-piece constant generation
-   instruction is remat'ed as a load from constantpool.
-
-//===---------------------------------------------------------------------===//
-
 bb27 ...
         ...
         %reg1037 = ADDri %reg1039, 1
@@ -206,3 +168,32 @@ Stack coloring improvments:
    not spill slots.
 2. Reorder objects to fill in gaps between objects.
    e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
+
+//===---------------------------------------------------------------------===//
+
+The scheduler should be able to sort nearby instructions by their address. For
+example, in an expanded memset sequence it's not uncommon to see code like this:
+
+  movl $0, 4(%rdi)
+  movl $0, 8(%rdi)
+  movl $0, 12(%rdi)
+  movl $0, 0(%rdi)
+
+Each of the stores is independent, and the scheduler is currently making an
+arbitrary decision about the order.
+
+//===---------------------------------------------------------------------===//
+
+Another opportunitiy in this code is that the $0 could be moved to a register:
+
+  movl $0, 4(%rdi)
+  movl $0, 8(%rdi)
+  movl $0, 12(%rdi)
+  movl $0, 0(%rdi)
+
+This would save substantial code size, especially for longer sequences like
+this. It would be easy to have a rule telling isel to avoid matching MOV32mi
+if the immediate has more than some fixed number of uses. It's more involved
+to teach the register allocator how to do late folding to recover from
+excessive register pressure.
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 904b4cb..5d58ea9 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -33,8 +33,10 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <set>
 #include <queue>
@@ -142,6 +144,7 @@ namespace {
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<LiveIntervals>();
       if (StrongPHIElim)
         AU.addRequiredID(StrongPHIEliminationID);
@@ -173,11 +176,11 @@ namespace {
 
     /// processActiveIntervals - expire old intervals and move non-overlapping
     /// ones to the inactive list.
-    void processActiveIntervals(unsigned CurPoint);
+    void processActiveIntervals(LiveIndex CurPoint);
 
     /// processInactiveIntervals - expire old intervals and move overlapping
     /// ones to the active list.
-    void processInactiveIntervals(unsigned CurPoint);
+    void processInactiveIntervals(LiveIndex CurPoint);
 
     /// hasNextReloadInterval - Return the next liveinterval that's being
     /// defined by a reload from the same SS as the specified one.
@@ -230,12 +233,12 @@ namespace {
       bool Error = false;
       for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
         if (regUse_[i] != 0) {
-          cerr << tri_->getName(i) << " is still in use!\n";
+          errs() << tri_->getName(i) << " is still in use!\n";
           Error = true;
         }
       }
       if (Error)
-        abort();
+        llvm_unreachable(0);
 #endif
       regUse_.clear();
       regUseBackUp_.clear();
@@ -295,15 +298,20 @@ namespace {
 
     template <typename ItTy>
     void printIntervals(const char* const str, ItTy i, ItTy e) const {
-      if (str) DOUT << str << " intervals:\n";
-      for (; i != e; ++i) {
-        DOUT << "\t" << *i->first << " -> ";
-        unsigned reg = i->first->reg;
-        if (TargetRegisterInfo::isVirtualRegister(reg)) {
-          reg = vrm_->getPhys(reg);
-        }
-        DOUT << tri_->getName(reg) << '\n';
-      }
+      DEBUG({
+          if (str)
+            errs() << str << " intervals:\n";
+
+          for (; i != e; ++i) {
+            errs() << "\t" << *i->first << " -> ";
+
+            unsigned reg = i->first->reg;
+            if (TargetRegisterInfo::isVirtualRegister(reg))
+              reg = vrm_->getPhys(reg);
+
+            errs() << tri_->getName(reg) << '\n';
+          }
+        });
     }
   };
   char RALinScan::ID = 0;
@@ -358,7 +366,8 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     return Reg;
 
   VNInfo *vni = cur.begin()->valno;
-  if (!vni->def || vni->isUnused() || !vni->isDefAccurate())
+  if ((vni->def == LiveIndex()) ||
+      vni->isUnused() || !vni->isDefAccurate())
     return Reg;
   MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
@@ -380,18 +389,18 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
 
   // Try to coalesce.
   if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) {
-    DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
-         << '\n';
+    DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
+                 << '\n');
     vrm_->clearVirt(cur.reg);
     vrm_->assignVirt2Phys(cur.reg, PhysReg);
 
     // Remove unnecessary kills since a copy does not clobber the register.
     if (li_->hasInterval(SrcReg)) {
       LiveInterval &SrcLI = li_->getInterval(SrcReg);
-      for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg),
-             E = mri_->reg_end(); I != E; ++I) {
+      for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg),
+             E = mri_->use_end(); I != E; ++I) {
         MachineOperand &O = I.getOperand();
-        if (!O.isUse() || !O.isKill())
+        if (!O.isKill())
           continue;
         MachineInstr *MI = &*I;
         if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI))))
@@ -478,24 +487,25 @@ void RALinScan::initIntervalSets()
   }
 }
 
-void RALinScan::linearScan()
-{
+void RALinScan::linearScan() {
   // linear scan algorithm
-  DOUT << "********** LINEAR SCAN **********\n";
-  DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n';
-
-  DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end()));
+  DEBUG({
+      errs() << "********** LINEAR SCAN **********\n"
+             << "********** Function: " 
+             << mf_->getFunction()->getName() << '\n';
+      printIntervals("fixed", fixed_.begin(), fixed_.end());
+    });
 
   while (!unhandled_.empty()) {
     // pick the interval with the earliest start point
     LiveInterval* cur = unhandled_.top();
     unhandled_.pop();
     ++NumIters;
-    DOUT << "\n*** CURRENT ***: " << *cur << '\n';
+    DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n');
 
     if (!cur->empty()) {
-      processActiveIntervals(cur->beginNumber());
-      processInactiveIntervals(cur->beginNumber());
+      processActiveIntervals(cur->beginIndex());
+      processInactiveIntervals(cur->beginIndex());
 
       assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
              "Can only allocate virtual registers!");
@@ -506,15 +516,17 @@ void RALinScan::linearScan()
     // assign it one.
     assignRegOrStackSlotAtInterval(cur);
 
-    DEBUG(printIntervals("active", active_.begin(), active_.end()));
-    DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+    DEBUG({
+        printIntervals("active", active_.begin(), active_.end());
+        printIntervals("inactive", inactive_.begin(), inactive_.end());
+      });
   }
 
   // Expire any remaining active intervals
   while (!active_.empty()) {
     IntervalPtr &IP = active_.back();
     unsigned reg = IP.first->reg;
-    DOUT << "\tinterval " << *IP.first << " expired\n";
+    DEBUG(errs() << "\tinterval " << *IP.first << " expired\n");
     assert(TargetRegisterInfo::isVirtualRegister(reg) &&
            "Can only allocate virtual registers!");
     reg = vrm_->getPhys(reg);
@@ -523,9 +535,11 @@ void RALinScan::linearScan()
   }
 
   // Expire any remaining inactive intervals
-  DEBUG(for (IntervalPtrs::reverse_iterator
-               i = inactive_.rbegin(); i != inactive_.rend(); ++i)
-        DOUT << "\tinterval " << *i->first << " expired\n");
+  DEBUG({
+      for (IntervalPtrs::reverse_iterator
+             i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+        errs() << "\tinterval " << *i->first << " expired\n";
+    });
   inactive_.clear();
 
   // Add live-ins to every BB except for entry. Also perform trivial coalescing.
@@ -560,7 +574,7 @@ void RALinScan::linearScan()
     }
   }
 
-  DOUT << *vrm_;
+  DEBUG(errs() << *vrm_);
 
   // Look for physical registers that end up not being allocated even though
   // register allocator had to spill other registers in its register class.
@@ -572,9 +586,9 @@ void RALinScan::linearScan()
 
 /// processActiveIntervals - expire old intervals and move non-overlapping ones
 /// to the inactive list.
-void RALinScan::processActiveIntervals(unsigned CurPoint)
+void RALinScan::processActiveIntervals(LiveIndex CurPoint)
 {
-  DOUT << "\tprocessing active intervals:\n";
+  DEBUG(errs() << "\tprocessing active intervals:\n");
 
   for (unsigned i = 0, e = active_.size(); i != e; ++i) {
     LiveInterval *Interval = active_[i].first;
@@ -584,7 +598,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {     // Remove expired intervals.
-      DOUT << "\t\tinterval " << *Interval << " expired\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -597,7 +611,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint)
 
     } else if (IntervalPos->start > CurPoint) {
       // Move inactive intervals to inactive list.
-      DOUT << "\t\tinterval " << *Interval << " inactive\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -618,9 +632,9 @@ void RALinScan::processActiveIntervals(unsigned CurPoint)
 
 /// processInactiveIntervals - expire old intervals and move overlapping
 /// ones to the active list.
-void RALinScan::processInactiveIntervals(unsigned CurPoint)
+void RALinScan::processInactiveIntervals(LiveIndex CurPoint)
 {
-  DOUT << "\tprocessing inactive intervals:\n";
+  DEBUG(errs() << "\tprocessing inactive intervals:\n");
 
   for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
     LiveInterval *Interval = inactive_[i].first;
@@ -630,7 +644,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {       // remove expired intervals.
-      DOUT << "\t\tinterval " << *Interval << " expired\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
 
       // Pop off the end of the list.
       inactive_[i] = inactive_.back();
@@ -638,7 +652,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint)
       --i; --e;
     } else if (IntervalPos->start <= CurPoint) {
       // move re-activated intervals in active list
-      DOUT << "\t\tinterval " << *Interval << " active\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " active\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -699,7 +713,7 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
   return IP.end();
 }
 
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, LiveIndex Point){
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     RALinScan::IntervalPtr &IP = V[i];
     LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
@@ -725,7 +739,8 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
   if (SI.hasAtLeastOneValue())
     VNI = SI.getValNumInfo(0);
   else
-    VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator());
+    VNI = SI.getNextValue(LiveIndex(), 0, false,
+                          ls_->getVNInfoAllocator());
 
   LiveInterval &RI = li_->getInterval(cur->reg);
   // FIXME: This may be overly conservative.
@@ -764,10 +779,12 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
   float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
   SmallVector<LiveInterval*, 8> SLIs[3];
 
-  DOUT << "\tConsidering " << NumCands << " candidates: ";
-  DEBUG(for (unsigned i = 0; i != NumCands; ++i)
-          DOUT << tri_->getName(Candidates[i].first) << " ";
-        DOUT << "\n";);
+  DEBUG({
+      errs() << "\tConsidering " << NumCands << " candidates: ";
+      for (unsigned i = 0; i != NumCands; ++i)
+        errs() << tri_->getName(Candidates[i].first) << " ";
+      errs() << "\n";
+    });
   
   // Calculate the number of conflicts of each candidate.
   for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
@@ -865,16 +882,15 @@ void RALinScan::UpgradeRegister(unsigned Reg) {
 namespace {
   struct LISorter {
     bool operator()(LiveInterval* A, LiveInterval* B) {
-      return A->beginNumber() < B->beginNumber();
+      return A->beginIndex() < B->beginIndex();
     }
   };
 }
 
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
-void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
-{
-  DOUT << "\tallocating current interval: ";
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
+  DEBUG(errs() << "\tallocating current interval: ");
 
   // This is an implicitly defined live interval, just assign any register.
   const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
@@ -882,7 +898,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
       physReg = *RC->allocation_order_begin(*mf_);
-    DOUT <<  tri_->getName(physReg) << '\n';
+    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
     // Note the register is not really in use.
     vrm_->assignVirt2Phys(cur->reg, physReg);
     return;
@@ -891,7 +907,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   backUpRegUses();
 
   std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
-  unsigned StartPosition = cur->beginNumber();
+  LiveIndex StartPosition = cur->beginIndex();
   const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
 
   // If start of this live interval is defined by a move instruction and its
@@ -901,7 +917,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
   if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if (vni->def && !vni->isUnused() && vni->isDefAccurate()) {
+    if ((vni->def != LiveIndex()) && !vni->isUnused() &&
+         vni->isDefAccurate()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
       if (CopyMI &&
@@ -963,7 +980,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
         // Okay, this reg is on the fixed list.  Check to see if we actually
         // conflict.
         LiveInterval *I = IP.first;
-        if (I->endNumber() > StartPosition) {
+        if (I->endIndex() > StartPosition) {
           LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
           IP.second = II;
           if (II != I->begin() && II->start > StartPosition)
@@ -988,7 +1005,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
 
         const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
         if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&       
-            I->endNumber() > StartPosition) {
+            I->endIndex() > StartPosition) {
           LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
           IP.second = II;
           if (II != I->begin() && II->start > StartPosition)
@@ -1015,7 +1032,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // the free physical register and add this interval to the active
   // list.
   if (physReg) {
-    DOUT <<  tri_->getName(physReg) << '\n';
+    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
     vrm_->assignVirt2Phys(cur->reg, physReg);
     addRegUse(physReg);
     active_.push_back(std::make_pair(cur, cur->begin()));
@@ -1031,7 +1048,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     }
     return;
   }
-  DOUT << "no free registers\n";
+  DEBUG(errs() << "no free registers\n");
 
   // Compile the spill weights into an array that is better for scanning.
   std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
@@ -1049,7 +1066,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
   }
  
-  DOUT << "\tassigning stack slot at interval "<< *cur << ":\n";
+  DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n");
 
   // Find a register to spill.
   float minWeight = HUGE_VALF;
@@ -1102,8 +1119,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
         DowngradedRegs.clear();
         assignRegOrStackSlotAtInterval(cur);
       } else {
-        cerr << "Ran out of registers during register allocation!\n";
-        exit(1);
+        llvm_report_error("Ran out of registers during register allocation!");
       }
       return;
     }
@@ -1117,16 +1133,19 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     --LastCandidate;
   }
 
-  DOUT << "\t\tregister(s) with min weight(s): ";
-  DEBUG(for (unsigned i = 0; i != LastCandidate; ++i)
-          DOUT << tri_->getName(RegsWeights[i].first)
-               << " (" << RegsWeights[i].second << ")\n");
+  DEBUG({
+      errs() << "\t\tregister(s) with min weight(s): ";
+
+      for (unsigned i = 0; i != LastCandidate; ++i)
+        errs() << tri_->getName(RegsWeights[i].first)
+               << " (" << RegsWeights[i].second << ")\n";
+    });
 
   // If the current has the minimum weight, we need to spill it and
   // add any added intervals back to unhandled, and restart
   // linearscan.
   if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
-    DOUT << "\t\t\tspilling(c): " << *cur << '\n';
+    DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n');
     SmallVector<LiveInterval*, 8> spillIs;
     std::vector<LiveInterval*> added;
     
@@ -1154,14 +1173,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
       LiveInterval *ReloadLi = added[i];
       if (ReloadLi->weight == HUGE_VALF &&
           li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-        unsigned ReloadIdx = ReloadLi->beginNumber();
+        LiveIndex ReloadIdx = ReloadLi->beginIndex();
         MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
         int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
         if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
           // Last reload of same SS is in the same MBB. We want to try to
           // allocate both reloads the same register and make sure the reg
           // isn't clobbered in between if at all possible.
-          assert(LastReload->beginNumber() < ReloadIdx);
+          assert(LastReload->beginIndex() < ReloadIdx);
           NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
         }
         LastReloadMBB = ReloadMBB;
@@ -1206,12 +1225,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // mark our rollback point.
   std::vector<LiveInterval*> added;
   while (!spillIs.empty()) {
-    bool epicFail = false;
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
-    DOUT << "\t\t\tspilling(a): " << *sli << '\n';
+    DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n');
     earliestStartInterval =
-      (earliestStartInterval->beginNumber() < sli->beginNumber()) ?
+      (earliestStartInterval->beginIndex() < sli->beginIndex()) ?
          earliestStartInterval : sli;
        
     std::vector<LiveInterval*> newIs;
@@ -1223,15 +1241,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     addStackInterval(sli, ls_, li_, mri_, *vrm_);
     std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
     spilled.insert(sli->reg);
-
-    if (epicFail) {
-      //abort();
-    }
   }
 
-  unsigned earliestStart = earliestStartInterval->beginNumber();
+  LiveIndex earliestStart = earliestStartInterval->beginIndex();
 
-  DOUT << "\t\trolling back to: " << earliestStart << '\n';
+  DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
 
   // Scan handled in reverse order up to the earliest start of a
   // spilled live interval and undo each one, restoring the state of
@@ -1239,9 +1253,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   while (!handled_.empty()) {
     LiveInterval* i = handled_.back();
     // If this interval starts before t we are done.
-    if (i->beginNumber() < earliestStart)
+    if (i->beginIndex() < earliestStart)
       break;
-    DOUT << "\t\t\tundo changes for: " << *i << '\n';
+    DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n');
     handled_.pop_back();
 
     // When undoing a live interval allocation we must know if it is active or
@@ -1290,8 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
     LiveInterval *HI = handled_[i];
     if (!HI->expiredAt(earliestStart) &&
-        HI->expiredAt(cur->beginNumber())) {
-      DOUT << "\t\t\tundo changes for: " << *HI << '\n';
+        HI->expiredAt(cur->beginIndex())) {
+      DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n');
       active_.push_back(std::make_pair(HI, HI->begin()));
       assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
       addRegUse(vrm_->getPhys(HI->reg));
@@ -1310,14 +1324,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     LiveInterval *ReloadLi = added[i];
     if (ReloadLi->weight == HUGE_VALF &&
         li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-      unsigned ReloadIdx = ReloadLi->beginNumber();
+      LiveIndex ReloadIdx = ReloadLi->beginIndex();
       MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
       int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
       if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
         // Last reload of same SS is in the same MBB. We want to try to
         // allocate both reloads the same register and make sure the reg
         // isn't clobbered in between if at all possible.
-        assert(LastReload->beginNumber() < ReloadIdx);
+        assert(LastReload->beginIndex() < ReloadIdx);
         NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
       }
       LastReloadMBB = ReloadMBB;
@@ -1420,7 +1434,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
   // available first.
   unsigned Preference = vrm_->getRegAllocPref(cur->reg);
   if (Preference) {
-    DOUT << "(preferred: " << tri_->getName(Preference) << ") ";
+    DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") ");
     if (isRegAvail(Preference) && 
         RC->contains(Preference))
       return Preference;
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index e1cc20c..6caa2d3 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -25,6 +25,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
@@ -151,6 +153,7 @@ namespace {
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequiredID(PHIEliminationID);
       AU.addRequiredID(TwoAddressInstructionPassID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -291,11 +294,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
   assert(VirtReg && "Spilling a physical register is illegal!"
          " Must not have appropriate kill for the register or use exists beyond"
          " the intended one.");
-  DOUT << "  Spilling register " << TRI->getName(PhysReg)
-       << " containing %reg" << VirtReg;
+  DEBUG(errs() << "  Spilling register " << TRI->getName(PhysReg)
+               << " containing %reg" << VirtReg);
   
   if (!isVirtRegModified(VirtReg)) {
-    DOUT << " which has not been modified, so no store necessary!";
+    DEBUG(errs() << " which has not been modified, so no store necessary!");
     std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
     if (LastUse.first)
       LastUse.first->getOperand(LastUse.second).setIsKill();
@@ -305,7 +308,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
     // modified.
     const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DOUT << " to stack slot #" << FrameIndex;
+    DEBUG(errs() << " to stack slot #" << FrameIndex);
     // If the instruction reads the register that's spilled, (e.g. this can
     // happen if it is a move to a physical register), then the spill
     // instruction is not a kill.
@@ -316,7 +319,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
 
   getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
 
-  DOUT << "\n";
+  DEBUG(errs() << '\n');
   removePhysReg(PhysReg);
 }
 
@@ -505,8 +508,8 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
 
   markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
 
-  DOUT << "  Reloading %reg" << VirtReg << " into "
-       << TRI->getName(PhysReg) << "\n";
+  DEBUG(errs() << "  Reloading %reg" << VirtReg << " into "
+               << TRI->getName(PhysReg) << "\n");
 
   // Add move instruction(s)
   TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
@@ -517,24 +520,28 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
   getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
 
   if (!ReloadedRegs.insert(PhysReg)) {
-    cerr << "Ran out of registers during register allocation!\n";
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
     if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
-      cerr << "Please check your inline asm statement for invalid "
+      Msg << "\nPlease check your inline asm statement for invalid "
            << "constraints:\n";
-      MI->print(cerr.stream(), TM);
+      MI->print(Msg, TM);
     }
-    exit(1);
+    llvm_report_error(Msg.str());
   }
   for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
        *SubRegs; ++SubRegs) {
     if (!ReloadedRegs.insert(*SubRegs)) {
-      cerr << "Ran out of registers during register allocation!\n";
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Ran out of registers during register allocation!";
       if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
-        cerr << "Please check your inline asm statement for invalid "
+        Msg << "\nPlease check your inline asm statement for invalid "
              << "constraints:\n";
-        MI->print(cerr.stream(), TM);
+        MI->print(Msg, TM);
       }
-      exit(1);
+      llvm_report_error(Msg.str());
     }
   }
 
@@ -707,8 +714,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
   // loop over each instruction
   MachineBasicBlock::iterator MII = MBB.begin();
   
-  DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
-        if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+  DEBUG({
+      const BasicBlock *LBB = MBB.getBasicBlock();
+      if (LBB)
+        errs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+    });
 
   // Add live-in registers as active.
   for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
@@ -733,13 +743,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
   while (MII != MBB.end()) {
     MachineInstr *MI = MII++;
     const TargetInstrDesc &TID = MI->getDesc();
-    DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI;
-          DOUT << "  Regs have values: ";
-          for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
-            if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
-               DOUT << "[" << TRI->getName(i)
-                    << ",%reg" << PhysRegsUsed[i] << "] ";
-          DOUT << "\n");
+    DEBUG({
+        errs() << "\nStarting RegAlloc of: " << *MI;
+        errs() << "  Regs have values: ";
+        for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+            errs() << "[" << TRI->getName(i)
+                   << ",%reg" << PhysRegsUsed[i] << "] ";
+        errs() << '\n';
+      });
 
     // Loop over the implicit uses, making sure that they are at the head of the
     // use order list, so they don't get reallocated.
@@ -783,8 +795,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
             markVirtRegModified(DestVirtReg);
             getVirtRegLastUse(DestVirtReg) =
                    std::make_pair((MachineInstr*)0, 0);
-            DOUT << "  Assigning " << TRI->getName(DestPhysReg)
-                 << " to %reg" << DestVirtReg << "\n";
+            DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+                         << " to %reg" << DestVirtReg << "\n");
             MO.setReg(DestPhysReg);  // Assign the earlyclobber register
           } else {
             unsigned Reg = MO.getReg();
@@ -849,15 +861,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DOUT << "  Last use of " << TRI->getName(PhysReg)
-             << "[%reg" << VirtReg <<"], removing it from live set\n";
+        DEBUG(errs() << "  Last use of " << TRI->getName(PhysReg)
+                     << "[%reg" << VirtReg <<"], removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
              *SubRegs; ++SubRegs) {
           if (PhysRegsUsed[*SubRegs] != -2) {
-            DOUT  << "  Last use of "
-                  << TRI->getName(*SubRegs)
-                  << "[%reg" << VirtReg <<"], removing it from live set\n";
+            DEBUG(errs()  << "  Last use of "
+                          << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+                          <<"], removing it from live set\n");
             removePhysReg(*SubRegs);
           }
         }
@@ -942,8 +954,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
         MF->getRegInfo().setPhysRegUsed(DestPhysReg);
         markVirtRegModified(DestVirtReg);
         getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
-        DOUT << "  Assigning " << TRI->getName(DestPhysReg)
-             << " to %reg" << DestVirtReg << "\n";
+        DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+                     << " to %reg" << DestVirtReg << "\n");
         MO.setReg(DestPhysReg);  // Assign the output register
       }
     }
@@ -965,16 +977,16 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DOUT  << "  Register " << TRI->getName(PhysReg)
-              << " [%reg" << VirtReg
-              << "] is never used, removing it from live set\n";
+        DEBUG(errs()  << "  Register " << TRI->getName(PhysReg)
+                      << " [%reg" << VirtReg
+                      << "] is never used, removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
              *AliasSet; ++AliasSet) {
           if (PhysRegsUsed[*AliasSet] != -2) {
-            DOUT  << "  Register " << TRI->getName(*AliasSet)
-                  << " [%reg" << *AliasSet
-                  << "] is never used, removing it from live set\n";
+            DEBUG(errs()  << "  Register " << TRI->getName(*AliasSet)
+                          << " [%reg" << *AliasSet
+                          << "] is never used, removing it from live set\n");
             removePhysReg(*AliasSet);
           }
         }
@@ -1022,7 +1034,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
-  DOUT << "Machine Function " << "\n";
+  DEBUG(errs() << "Machine Function\n");
   MF = &Fn;
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 89e2c59..bee5d93 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,7 +31,9 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "PBQP.h"
+#include "PBQP/HeuristicSolver.h"
+#include "PBQP/SimpleGraph.h"
+#include "PBQP/Heuristics/Briggs.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -42,6 +44,7 @@
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <limits>
@@ -53,32 +56,38 @@
 using namespace llvm;
 
 static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator",
-                     createPBQPRegisterAllocator);
+registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
+                      llvm::createPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+               cl::desc("Attempt coalescing during PBQP register allocation."),
+               cl::init(false), cl::Hidden);
 
 namespace {
 
-  //!
-  //! PBQP based allocators solve the register allocation problem by mapping
-  //! register allocation problems to Partitioned Boolean Quadratic
-  //! Programming problems.
+  ///
+  /// PBQP based allocators solve the register allocation problem by mapping
+  /// register allocation problems to Partitioned Boolean Quadratic
+  /// Programming problems.
   class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass {
   public:
 
     static char ID;
 
-    //! Construct a PBQP register allocator.
-    PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {}
+    /// Construct a PBQP register allocator.
+    PBQPRegAlloc() : MachineFunctionPass(&ID) {}
 
-    //! Return the pass name.
-    virtual const char* getPassName() const throw() {
+    /// Return the pass name.
+    virtual const char* getPassName() const {
       return "PBQP Register Allocator";
     }
 
-    //! PBQP analysis usage.
+    /// PBQP analysis usage.
     virtual void getAnalysisUsage(AnalysisUsage &au) const {
       au.addRequired<LiveIntervals>();
-      au.addRequiredTransitive<RegisterCoalescer>();
+      //au.addRequiredID(SplitCriticalEdgesID);
+      au.addRequired<RegisterCoalescer>();
       au.addRequired<LiveStacks>();
       au.addPreserved<LiveStacks>();
       au.addRequired<MachineLoopInfo>();
@@ -87,7 +96,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(au);
     }
 
-    //! Perform register allocation
+    /// Perform register allocation
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
   private:
@@ -97,7 +106,7 @@ namespace {
     typedef std::vector<AllowedSet> AllowedSetMap;
     typedef std::set<unsigned> RegSet;
     typedef std::pair<unsigned, unsigned> RegPair;
-    typedef std::map<RegPair, PBQPNum> CoalesceMap;
+    typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
 
     typedef std::set<LiveInterval*> LiveIntervalSet;
 
@@ -119,60 +128,60 @@ namespace {
                     emptyVRegIntervals;
 
 
-    //! Builds a PBQP cost vector.
+    /// Builds a PBQP cost vector.
     template <typename RegContainer>
-    PBQPVector* buildCostVector(unsigned vReg,
-                                const RegContainer &allowed,
-                                const CoalesceMap &cealesces,
-                                PBQPNum spillCost) const;
-
-    //! \brief Builds a PBQP interference matrix.
-    //!
-    //! @return Either a pointer to a non-zero PBQP matrix representing the
-    //!         allocation option costs, or a null pointer for a zero matrix.
-    //!
-    //! Expects allowed sets for two interfering LiveIntervals. These allowed
-    //! sets should contain only allocable registers from the LiveInterval's
-    //! register class, with any interfering pre-colored registers removed.
+    PBQP::Vector buildCostVector(unsigned vReg,
+                                 const RegContainer &allowed,
+                                 const CoalesceMap &cealesces,
+                                 PBQP::PBQPNum spillCost) const;
+
+    /// \brief Builds a PBQP interference matrix.
+    ///
+    /// @return Either a pointer to a non-zero PBQP matrix representing the
+    ///         allocation option costs, or a null pointer for a zero matrix.
+    ///
+    /// Expects allowed sets for two interfering LiveIntervals. These allowed
+    /// sets should contain only allocable registers from the LiveInterval's
+    /// register class, with any interfering pre-colored registers removed.
     template <typename RegContainer>
-    PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1,
-                                        const RegContainer &allowed2) const;
-
-    //!
-    //! Expects allowed sets for two potentially coalescable LiveIntervals,
-    //! and an estimated benefit due to coalescing. The allowed sets should
-    //! contain only allocable registers from the LiveInterval's register
-    //! classes, with any interfering pre-colored registers removed.
+    PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
+                                          const RegContainer &allowed2) const;
+
+    ///
+    /// Expects allowed sets for two potentially coalescable LiveIntervals,
+    /// and an estimated benefit due to coalescing. The allowed sets should
+    /// contain only allocable registers from the LiveInterval's register
+    /// classes, with any interfering pre-colored registers removed.
     template <typename RegContainer>
-    PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1,
-                                      const RegContainer &allowed2,
-                                      PBQPNum cBenefit) const;
-
-    //! \brief Finds coalescing opportunities and returns them as a map.
-    //!
-    //! Any entries in the map are guaranteed coalescable, even if their
-    //! corresponding live intervals overlap.
+    PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
+                                        const RegContainer &allowed2,
+                                        PBQP::PBQPNum cBenefit) const;
+
+    /// \brief Finds coalescing opportunities and returns them as a map.
+    ///
+    /// Any entries in the map are guaranteed coalescable, even if their
+    /// corresponding live intervals overlap.
     CoalesceMap findCoalesces();
 
-    //! \brief Finds the initial set of vreg intervals to allocate.
+    /// \brief Finds the initial set of vreg intervals to allocate.
     void findVRegIntervalsToAlloc();
 
-    //! \brief Constructs a PBQP problem representation of the register
-    //! allocation problem for this function.
-    //!
-    //! @return a PBQP solver object for the register allocation problem.
-    pbqp* constructPBQPProblem();
+    /// \brief Constructs a PBQP problem representation of the register
+    /// allocation problem for this function.
+    ///
+    /// @return a PBQP solver object for the register allocation problem.
+    PBQP::SimpleGraph constructPBQPProblem();
 
-    //! \brief Adds a stack interval if the given live interval has been
-    //! spilled. Used to support stack slot coloring.
+    /// \brief Adds a stack interval if the given live interval has been
+    /// spilled. Used to support stack slot coloring.
     void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
 
-    //! \brief Given a solved PBQP problem maps this solution back to a register
-    //! assignment.
-    bool mapPBQPToRegAlloc(pbqp *problem);
+    /// \brief Given a solved PBQP problem maps this solution back to a register
+    /// assignment.
+    bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
 
-    //! \brief Postprocessing before final spilling. Sets basic block "live in"
-    //! variables.
+    /// \brief Postprocessing before final spilling. Sets basic block "live in"
+    /// variables.
     void finalizeAlloc() const;
 
   };
@@ -182,17 +191,17 @@ namespace {
 
 
 template <typename RegContainer>
-PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg,
-                                          const RegContainer &allowed,
-                                          const CoalesceMap &coalesces,
-                                          PBQPNum spillCost) const {
+PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
+                                           const RegContainer &allowed,
+                                           const CoalesceMap &coalesces,
+                                           PBQP::PBQPNum spillCost) const {
 
   typedef typename RegContainer::const_iterator AllowedItr;
 
   // Allocate vector. Additional element (0th) used for spill option
-  PBQPVector *v = new PBQPVector(allowed.size() + 1);
+  PBQP::Vector v(allowed.size() + 1, 0);
 
-  (*v)[0] = spillCost;
+  v[0] = spillCost;
 
   // Iterate over the allowed registers inserting coalesce benefits if there
   // are any.
@@ -210,14 +219,14 @@ PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg,
       continue;
 
     // We have a coalesce - insert the benefit.
-    (*v)[ai + 1] = -cmItr->second;
+    v[ai + 1] = -cmItr->second;
   }
 
   return v;
 }
 
 template <typename RegContainer>
-PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
+PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
       const RegContainer &allowed1, const RegContainer &allowed2) const {
 
   typedef typename RegContainer::const_iterator RegContainerIterator;
@@ -230,7 +239,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
   // that the spill option (element 0,0) has zero cost, since we can allocate
   // both intervals to memory safely (the cost for each individual allocation
   // to memory is accounted for by the cost vectors for each live interval).
-  PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+  PBQP::Matrix *m =
+    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
 
   // Assume this is a zero matrix until proven otherwise.  Zero matrices occur
   // between interfering live ranges with non-overlapping register sets (e.g.
@@ -259,8 +269,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
       unsigned reg2 = *a2Itr;
 
       // If the row/column regs are identical or alias insert an infinity.
-      if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) {
-        (*m)[ri][ci] = std::numeric_limits<PBQPNum>::infinity();
+      if (tri->regsOverlap(reg1, reg2)) {
+        (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
         isZeroMatrix = false;
       }
 
@@ -282,9 +292,9 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
 }
 
 template <typename RegContainer>
-PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix(
+PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
       const RegContainer &allowed1, const RegContainer &allowed2,
-      PBQPNum cBenefit) const {
+      PBQP::PBQPNum cBenefit) const {
 
   typedef typename RegContainer::const_iterator RegContainerIterator;
 
@@ -293,7 +303,8 @@ PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix(
   // for the LiveIntervals which are (potentially) to be coalesced. The amount
   // -cBenefit will be placed in any element representing the same register
   // for both intervals.
-  PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+  PBQP::Matrix *m =
+    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
 
   // Reset costs to zero.
   m->reset(0);
@@ -442,7 +453,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
                vniItr != vniEnd; ++vniItr) {
 
           // We want to make sure we skip the copy instruction itself.
-          if ((*vniItr)->copy == instr)
+          if ((*vniItr)->getCopy() == instr)
             continue;
 
           if (srcLI->liveAt((*vniItr)->def)) {
@@ -495,10 +506,11 @@ void PBQPRegAlloc::findVRegIntervalsToAlloc() {
   }
 }
 
-pbqp* PBQPRegAlloc::constructPBQPProblem() {
+PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() {
 
   typedef std::vector<const LiveInterval*> LIVector;
   typedef std::vector<unsigned> RegVector;
+  typedef std::vector<PBQP::SimpleGraph::NodeIterator> NodeVector;
 
   // This will store the physical intervals for easy reference.
   LIVector physIntervals;
@@ -530,10 +542,15 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
   }
 
   // Get the set of potential coalesces.
-  CoalesceMap coalesces(findCoalesces());
+  CoalesceMap coalesces;
+
+  if (pbqpCoalescing) {
+    coalesces = findCoalesces();
+  }
 
   // Construct a PBQP solver for this problem
-  pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size());
+  PBQP::SimpleGraph problem;
+  NodeVector problemNodes(vregIntervalsToAlloc.size());
 
   // Resize allowedSets container appropriately.
   allowedSets.resize(vregIntervalsToAlloc.size());
@@ -594,13 +611,13 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
 
     // Set the spill cost to the interval weight, or epsilon if the
     // interval weight is zero
-    PBQPNum spillCost = (li->weight != 0.0) ?
-        li->weight : std::numeric_limits<PBQPNum>::min();
+    PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
+        li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
 
     // Build a cost vector for this interval.
-    add_pbqp_nodecosts(solver, node,
-                       buildCostVector(li->reg, allowedSets[node], coalesces,
-                                       spillCost));
+    problemNodes[node] =
+      problem.addNode(
+        buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
 
   }
 
@@ -616,7 +633,7 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
       CoalesceMap::const_iterator cmItr =
         coalesces.find(RegPair(li->reg, li2->reg));
 
-      PBQPMatrix *m = 0;
+      PBQP::Matrix *m = 0;
 
       if (cmItr != coalesces.end()) {
         m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
@@ -627,14 +644,29 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
       }
 
       if (m != 0) {
-        add_pbqp_edgecosts(solver, node1, node2, m);
+        problem.addEdge(problemNodes[node1],
+                        problemNodes[node2],
+                        *m);
+
         delete m;
       }
     }
   }
 
+  problem.assignNodeIDs();
+
+  assert(problem.getNumNodes() == allowedSets.size());
+  for (unsigned i = 0; i < allowedSets.size(); ++i) {
+    assert(problem.getNodeItr(i) == problemNodes[i]);
+  }
+/*
+  std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
+            << problem.getNumEdges() << " edges.\n";
+
+  problem.printDot(std::cerr);
+*/
   // We're done, PBQP problem constructed - return it.
-  return solver;
+  return problem;
 }
 
 void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
@@ -651,14 +683,14 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
   if (stackInterval.getNumValNums() != 0)
     vni = stackInterval.getValNumInfo(0);
   else
-    vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator());
+    vni = stackInterval.getNextValue(
+      LiveIndex(), 0, false, lss->getVNInfoAllocator());
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
 }
 
-bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
-
+bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
   // Set to true if we have any spills
   bool anotherRoundNeeded = false;
 
@@ -668,14 +700,16 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
   // Iterate over the nodes mapping the PBQP solution to a register assignment.
   for (unsigned node = 0; node < node2LI.size(); ++node) {
     unsigned virtReg = node2LI[node]->reg,
-             allocSelection = get_pbqp_solution(problem, node);
+             allocSelection = solution.getSelection(node);
+
 
     // If the PBQP solution is non-zero it's a physical register...
     if (allocSelection != 0) {
       // Get the physical reg, subtracting 1 to account for the spill option.
       unsigned physReg = allowedSets[node][allocSelection - 1];
 
-      DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n";
+      DEBUG(errs() << "VREG " << virtReg << " -> "
+                   << tri->getName(physReg) << "\n");
 
       assert(physReg != 0);
 
@@ -697,8 +731,9 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
         lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
       addStackInterval(spillInterval, mri);
 
-      DOUT << "VREG " << virtReg << " -> SPILLED (Cost: "
-           << oldSpillWeight << ", New vregs: ";
+      (void) oldSpillWeight;
+      DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: "
+                   << oldSpillWeight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
       // allocate.
@@ -708,12 +743,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
 
         assert(!(*itr)->empty() && "Empty spill range.");
 
-        DOUT << (*itr)->reg << " ";
+        DEBUG(errs() << (*itr)->reg << " ");
 
         vregIntervalsToAlloc.insert(*itr);
       }
 
-      DOUT << ")\n";
+      DEBUG(errs() << ")\n");
 
       // We need another round if spill intervals were added.
       anotherRoundNeeded |= !newSpills.empty();
@@ -734,6 +769,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
     LiveInterval *li = *itr;
 
     unsigned physReg = vrm->getRegAllocPref(li->reg);
+
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
       physReg = *liRC->allocation_order_begin(*mf);
@@ -764,8 +800,8 @@ void PBQPRegAlloc::finalizeAlloc() const {
       continue;
     }
 
-    // Ignore unallocated vregs:
     if (reg == 0) {
+      // Filter out zero regs - they're for intervals that were spilled.
       continue;
     }
 
@@ -804,7 +840,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   vrm = &getAnalysis<VirtRegMap>();
 
-  DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n";
+  DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
 
   // Allocator main loop:
   //
@@ -829,15 +865,14 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     unsigned round = 0;
 
     while (!pbqpAllocComplete) {
-      DOUT << "  PBQP Regalloc round " << round << ":\n";
-
-      pbqp *problem = constructPBQPProblem();
-
-      solve_pbqp(problem);
+      DEBUG(errs() << "  PBQP Regalloc round " << round << ":\n");
 
-      pbqpAllocComplete = mapPBQPToRegAlloc(problem);
+      PBQP::SimpleGraph problem = constructPBQPProblem();
+      PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver;
+      problem.assignNodeIDs();
+      PBQP::Solution solution = solver.solve(problem);
 
-      free_pbqp(problem);
+      pbqpAllocComplete = mapPBQPToRegAlloc(solution);
 
       ++round;
     }
@@ -852,7 +887,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   node2LI.clear();
   allowedSets.clear();
 
-  DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n";
+  DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
   // Run rewriter
   std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index d7fe7a2..5f1c4e2 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,46 +16,21 @@
 
 #define DEBUG_TYPE "reg-scavenging"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
-/// RedefinesSuperRegPart - Return true if the specified register is redefining
-/// part of a super-register.
-static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg,
-                                  const TargetRegisterInfo *TRI) {
-  bool SeenSuperUse = false;
-  bool SeenSuperDef = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isUndef())
-      continue;
-    if (TRI->isSuperRegister(SubReg, MO.getReg())) {
-      if (MO.isUse())
-        SeenSuperUse = true;
-      else if (MO.isImplicit())
-        SeenSuperDef = true;
-    }
-  }
-
-  return SeenSuperDef && SeenSuperUse;
-}
-
-static bool RedefinesSuperRegPart(const MachineInstr *MI,
-                                  const MachineOperand &MO,
-                                  const TargetRegisterInfo *TRI) {
-  assert(MO.isReg() && MO.isDef() && "Not a register def!");
-  return RedefinesSuperRegPart(MI, MO.getReg(), TRI);
-}
-
 /// setUsed - Set the register and its sub-registers as being used.
 void RegScavenger::setUsed(unsigned Reg) {
   RegsAvailable.reset(Reg);
@@ -65,14 +40,38 @@ void RegScavenger::setUsed(unsigned Reg) {
     RegsAvailable.reset(SubReg);
 }
 
-/// setUnused - Set the register and its sub-registers as being unused.
-void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) {
-  RegsAvailable.set(Reg);
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+  if (isUsed(Reg))
+    return true;
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+    if (isUsed(*R))
+      return true;
+  return false;
+}
 
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-       unsigned SubReg = *SubRegs; ++SubRegs)
-    if (!RedefinesSuperRegPart(MI, Reg, TRI))
-      RegsAvailable.set(SubReg);
+void RegScavenger::initRegState() {
+  ScavengedReg = 0;
+  ScavengedRC = NULL;
+  ScavengeRestore = NULL;
+
+  // All registers started out unused.
+  RegsAvailable.set();
+
+  // Reserved registers are always used.
+  RegsAvailable ^= ReservedRegs;
+
+  if (!MBB)
+    return;
+
+  // Live-in registers are in use.
+  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    setUsed(*I);
+
+  // Pristine CSRs are also unavailable.
+  BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+    setUsed(I);
 }
 
 void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
@@ -85,6 +84,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
   assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
          "Target changed?");
 
+  // Self-initialize.
   if (!MBB) {
     NumPhysRegs = TRI->getNumRegs();
     RegsAvailable.resize(NumPhysRegs);
@@ -100,73 +100,26 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
         CalleeSavedRegs.set(CSRegs[i]);
   }
 
-  MBB = mbb;
-  ScavengedReg = 0;
-  ScavengedRC = NULL;
-  ScavengeRestore = NULL;
-  CurrDist = 0;
-  DistanceMap.clear();
-
-  // All registers started out unused.
-  RegsAvailable.set();
-
-  // Reserved registers are always used.
-  RegsAvailable ^= ReservedRegs;
-
-  // Live-in registers are in use.
-  if (!MBB->livein_empty())
-    for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
-           E = MBB->livein_end(); I != E; ++I)
-      setUsed(*I);
+  // RS used within emit{Pro,Epi}logue()
+  if (mbb != MBB) {
+    MBB = mbb;
+    initRegState();
+  }
 
   Tracking = false;
 }
 
-void RegScavenger::restoreScavengedReg() {
-  TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg,
-                            ScavengingFrameIndex, ScavengedRC);
-  MachineBasicBlock::iterator II = prior(MBBI);
-  TRI->eliminateFrameIndex(II, 0, this);
-  setUsed(ScavengedReg);
-  ScavengedReg = 0;
-  ScavengedRC = NULL;
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+    BV.set(*R);
 }
 
-#ifndef NDEBUG
-/// isLiveInButUnusedBefore - Return true if register is livein the MBB not
-/// not used before it reaches the MI that defines register.
-static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI,
-                                    MachineBasicBlock *MBB,
-                                    const TargetRegisterInfo *TRI,
-                                    MachineRegisterInfo* MRI) {
-  // First check if register is livein.
-  bool isLiveIn = false;
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
-         E = MBB->livein_end(); I != E; ++I)
-    if (Reg == *I || TRI->isSuperRegister(Reg, *I)) {
-      isLiveIn = true;
-      break;
-    }
-  if (!isLiveIn)
-    return false;
-
-  // Is there any use of it before the specified MI?
-  SmallPtrSet<MachineInstr*, 4> UsesInMBB;
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI->getParent() == MBB)
-      UsesInMBB.insert(UseMI);
-  }
-  if (UsesInMBB.empty())
-    return true;
-
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I)
-    if (UsesInMBB.count(&*I))
-      return false;
-  return true;
+void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+    BV.set(*R);
 }
-#endif
 
 void RegScavenger::forward() {
   // Move ptr forward.
@@ -179,7 +132,6 @@ void RegScavenger::forward() {
   }
 
   MachineInstr *MI = MBBI;
-  DistanceMap.insert(std::make_pair(MI, CurrDist++));
 
   if (MI == ScavengeRestore) {
     ScavengedReg = 0;
@@ -187,153 +139,63 @@ void RegScavenger::forward() {
     ScavengeRestore = NULL;
   }
 
-#if 0
-  if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-    return;
-#endif
-
-  // Separate register operands into 3 classes: uses, defs, earlyclobbers.
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+  // Find out which registers are early clobbered, killed, defined, and marked
+  // def-dead in this instruction.
+  BitVector EarlyClobberRegs(NumPhysRegs);
+  BitVector KillRegs(NumPhysRegs);
+  BitVector DefRegs(NumPhysRegs);
+  BitVector DeadRegs(NumPhysRegs);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
+    if (!MO.isReg() || MO.isUndef())
       continue;
-    if (MO.isUse())
-      UseMOs.push_back(std::make_pair(&MO,i));
-    else if (MO.isEarlyClobber())
-      EarlyClobberMOs.push_back(std::make_pair(&MO,i));
-    else
-      DefMOs.push_back(std::make_pair(&MO,i));
-  }
-
-  // Process uses first.
-  BitVector KillRegs(NumPhysRegs);
-  for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
-    const MachineOperand MO = *UseMOs[i].first;
     unsigned Reg = MO.getReg();
-
-    assert(isUsed(Reg) && "Using an undefined register!");
-
-    if (MO.isKill() && !isReserved(Reg)) {
-      KillRegs.set(Reg);
-
-      // Mark sub-registers as used.
-      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-           unsigned SubReg = *SubRegs; ++SubRegs)
-        KillRegs.set(SubReg);
-    }
-  }
-
-  // Change states of all registers after all the uses are processed to guard
-  // against multiple uses.
-  setUnused(KillRegs);
-
-  // Process early clobber defs then process defs. We can have a early clobber
-  // that is dead, it should not conflict with a def that happens one "slot"
-  // (see InstrSlots in LiveIntervalAnalysis.h) later.
-  unsigned NumECs = EarlyClobberMOs.size();
-  unsigned NumDefs = DefMOs.size();
-
-  for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
-    const MachineOperand &MO = (i < NumECs)
-      ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first;
-    unsigned Idx = (i < NumECs)
-      ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second;
-    unsigned Reg = MO.getReg();
-    if (MO.isUndef())
+    if (!Reg || isReserved(Reg))
       continue;
 
-    // If it's dead upon def, then it is now free.
-    if (MO.isDead()) {
-      setUnused(Reg, MI);
-      continue;
-    }
-
-    // Skip two-address destination operand.
-    unsigned UseIdx;
-    if (MI->isRegTiedToUseOperand(Idx, &UseIdx) &&
-        !MI->getOperand(UseIdx).isUndef()) {
-      assert(isUsed(Reg) && "Using an undefined register!");
-      continue;
+    if (MO.isUse()) {
+      // Two-address operands implicitly kill.
+      if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+        addRegWithSubRegs(KillRegs, Reg);
+    } else {
+      assert(MO.isDef());
+      if (MO.isDead())
+        addRegWithSubRegs(DeadRegs, Reg);
+      else
+        addRegWithSubRegs(DefRegs, Reg);
+      if (MO.isEarlyClobber())
+        addRegWithAliases(EarlyClobberRegs, Reg);
     }
-
-    // Skip if this is merely redefining part of a super-register.
-    if (RedefinesSuperRegPart(MI, MO, TRI))
-      continue;
-
-    // Implicit def is allowed to "re-define" any register. Similarly,
-    // implicitly defined registers can be clobbered.
-    assert((isReserved(Reg) || isUnused(Reg) ||
-            isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
-           "Re-defining a live register!");
-    setUsed(Reg);
   }
-}
-
-void RegScavenger::backward() {
-  assert(Tracking && "Not tracking states!");
-  assert(MBBI != MBB->begin() && "Already at start of basic block!");
-  // Move ptr backward.
-  MBBI = prior(MBBI);
-
-  MachineInstr *MI = MBBI;
-  DistanceMap.erase(MI);
-  --CurrDist;
 
-  // Separate register operands into 3 classes: uses, defs, earlyclobbers.
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+  // Verify uses and defs.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
-      continue;
-    if (MO.isUse())
-      UseMOs.push_back(std::make_pair(&MO,i));
-    else if (MO.isEarlyClobber())
-      EarlyClobberMOs.push_back(std::make_pair(&MO,i));
-    else
-      DefMOs.push_back(std::make_pair(&MO,i));
-  }
-
-
-  // Process defs first.
-  unsigned NumECs = EarlyClobberMOs.size();
-  unsigned NumDefs = DefMOs.size();
-  for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
-    const MachineOperand &MO = (i < NumDefs)
-      ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first;
-    unsigned Idx = (i < NumECs)
-      ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second;
-    if (MO.isUndef())
-      continue;
-
-    // Skip two-address destination operand.
-    if (MI->isRegTiedToUseOperand(Idx))
+    if (!MO.isReg() || MO.isUndef())
       continue;
-
     unsigned Reg = MO.getReg();
-    assert(isUsed(Reg));
-    if (!isReserved(Reg))
-      setUnused(Reg, MI);
+    if (!Reg || isReserved(Reg))
+      continue;
+    if (MO.isUse()) {
+      assert(isUsed(Reg) && "Using an undefined register!");
+      assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
+             "Using an early clobbered register!");
+    } else {
+      assert(MO.isDef());
+#if 0
+      // FIXME: Enable this once we've figured out how to correctly transfer
+      // implicit kills during codegen passes like the coalescer.
+      assert((KillRegs.test(Reg) || isUnused(Reg) ||
+              isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+             "Re-defining a live register!");
+#endif
+    }
   }
 
-  // Process uses.
-  BitVector UseRegs(NumPhysRegs);
-  for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
-    const MachineOperand MO = *UseMOs[i].first;
-    unsigned Reg = MO.getReg();
-    assert(isUnused(Reg) || isReserved(Reg));
-    UseRegs.set(Reg);
-
-    // Set the sub-registers as "used".
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-         unsigned SubReg = *SubRegs; ++SubRegs)
-      UseRegs.set(SubReg);
-  }
-  setUsed(UseRegs);
+  // Commit the changes.
+  setUnused(KillRegs);
+  setUnused(DeadRegs);
+  setUsed(DefRegs);
 }
 
 void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
@@ -351,129 +213,110 @@ static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
     Mask.set(*I);
 }
 
-unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
-                                     const BitVector &Candidates) const {
-  // Mask off the registers which are not in the TargetRegisterClass.
-  BitVector RegsAvailableCopy(NumPhysRegs, false);
-  CreateRegClassMask(RegClass, RegsAvailableCopy);
-  RegsAvailableCopy &= RegsAvailable;
-
-  // Restrict the search to candidates.
-  RegsAvailableCopy &= Candidates;
-
-  // Returns the first unused (bit is set) register, or 0 is none is found.
-  int Reg = RegsAvailableCopy.find_first();
-  return (Reg == -1) ? 0 : Reg;
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I)
+    if (!isAliasUsed(*I))
+      return *I;
+  return 0;
 }
 
-unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
-                                     bool ExCalleeSaved) const {
-  // Mask off the registers which are not in the TargetRegisterClass.
-  BitVector RegsAvailableCopy(NumPhysRegs, false);
-  CreateRegClassMask(RegClass, RegsAvailableCopy);
-  RegsAvailableCopy &= RegsAvailable;
-
-  // If looking for a non-callee-saved register, mask off all the callee-saved
-  // registers.
-  if (ExCalleeSaved)
-    RegsAvailableCopy &= ~CalleeSavedRegs;
-
-  // Returns the first unused (bit is set) register, or 0 is none is found.
-  int Reg = RegsAvailableCopy.find_first();
-  return (Reg == -1) ? 0 : Reg;
-}
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after MBBI. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI,
+                                       BitVector &Candidates,
+                                       unsigned InstrLimit,
+                                       MachineBasicBlock::iterator &UseMI) {
+  int Survivor = Candidates.find_first();
+  assert(Survivor > 0 && "No candidates for scavenging");
+
+  MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+  assert(MI != ME && "MI already at terminator");
+
+  for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+    // Remove any candidates touched by instruction.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      Candidates.reset(MO.getReg());
+      for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+        Candidates.reset(*R);
+    }
 
-/// findFirstUse - Calculate the distance to the first use of the
-/// specified register.
-MachineInstr*
-RegScavenger::findFirstUse(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator I, unsigned Reg,
-                           unsigned &Dist) {
-  MachineInstr *UseMI = 0;
-  Dist = ~0U;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineInstr *UDMI = &*RI;
-    if (UDMI->getParent() != MBB)
+    // Was our survivor untouched by this instruction?
+    if (Candidates.test(Survivor))
       continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-    if (DI == DistanceMap.end()) {
-      // If it's not in map, it's below current MI, let's initialize the
-      // map.
-      I = next(I);
-      unsigned Dist = CurrDist + 1;
-      while (I != MBB->end()) {
-        DistanceMap.insert(std::make_pair(I, Dist++));
-        I = next(I);
-      }
-    }
-    DI = DistanceMap.find(UDMI);
-    if (DI->second > CurrDist && DI->second < Dist) {
-      Dist = DI->second;
-      UseMI = UDMI;
-    }
+
+    // All candidates gone?
+    if (Candidates.none())
+      break;
+
+    Survivor = Candidates.find_first();
   }
-  return UseMI;
+
+  // We ran out of candidates, so stop the search.
+  UseMI = MI;
+  return Survivor;
 }
 
 unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
                                         MachineBasicBlock::iterator I,
                                         int SPAdj) {
-  assert(ScavengingFrameIndex >= 0 &&
-         "Cannot scavenge a register without an emergency spill slot!");
-
   // Mask off the registers which are not in the TargetRegisterClass.
   BitVector Candidates(NumPhysRegs, false);
   CreateRegClassMask(RC, Candidates);
-  Candidates ^= ReservedRegs;  // Do not include reserved registers.
+  // Do not include reserved registers.
+  Candidates ^= ReservedRegs & Candidates;
 
   // Exclude all the registers being used by the instruction.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = I->getOperand(i);
-    if (MO.isReg())
+    if (MO.isReg() && MO.getReg() != 0 &&
+        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       Candidates.reset(MO.getReg());
   }
 
   // Find the register whose use is furthest away.
-  unsigned SReg = 0;
-  unsigned MaxDist = 0;
-  MachineInstr *MaxUseMI = 0;
-  int Reg = Candidates.find_first();
-  while (Reg != -1) {
-    unsigned Dist;
-    MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist);
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
-      unsigned AsDist;
-      MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist);
-      if (AsDist < Dist) {
-        Dist = AsDist;
-        UseMI = AsUseMI;
-      }
-    }
-    if (Dist >= MaxDist) {
-      MaxDist = Dist;
-      MaxUseMI = UseMI;
-      SReg = Reg;
-    }
-    Reg = Candidates.find_next(Reg);
-  }
+  MachineBasicBlock::iterator UseMI;
+  unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
 
-  if (ScavengedReg != 0) {
-    assert(0 && "Scavenger slot is live, unable to scavenge another register!");
-    abort();
-  }
+  // If we found an unused register there is no reason to spill it. We have
+  // probably found a callee-saved register that has been saved in the
+  // prologue, but happens to be unused at this point.
+  if (!isAliasUsed(SReg))
+    return SReg;
 
-  // Spill the scavenged register before I.
-  TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
-  MachineBasicBlock::iterator II = prior(I);
-  TRI->eliminateFrameIndex(II, SPAdj, this);
+  assert(ScavengedReg == 0 &&
+         "Scavenger slot is live, unable to scavenge another register!");
 
-  // Restore the scavenged register before its use (or first terminator).
-  II = MaxUseMI
-    ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator();
-  TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC);
-  ScavengeRestore = prior(II);
+  // Avoid infinite regress
   ScavengedReg = SReg;
+
+  // If the target knows how to save/restore the register, let it do so;
+  // otherwise, use the emergency stack spill slot.
+  if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) {
+    // Spill the scavenged register before I.
+    assert(ScavengingFrameIndex >= 0 &&
+           "Cannot scavenge register without an emergency spill slot!");
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+    MachineBasicBlock::iterator II = prior(I);
+    TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+
+    // Restore the scavenged register before its use (or first terminator).
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC);
+  } else
+    TRI->restoreScavengerRegister(*MBB, UseMI, RC, SReg);
+
+  ScavengeRestore = prior(UseMI);
+
+  // Doing this here leads to infinite regress.
+  // ScavengedReg = SReg;
   ScavengedRC = RC;
 
   return SReg;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index a8452df..5a59862 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
@@ -40,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const {
     if (SUnit *SU = Sequence[i])
       SU->dump(this);
     else
-      cerr << "**** NOOP ****\n";
+      errs() << "**** NOOP ****\n";
   }
 }
 
@@ -59,9 +60,11 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
 
   Schedule();
 
-  DOUT << "*** Final schedule ***\n";
-  DEBUG(dumpSchedule());
-  DOUT << "\n";
+  DEBUG({
+      errs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      errs() << '\n';
+    });
 }
 
 /// addPred - This adds the specified edge as a pred of the current node if
@@ -79,13 +82,19 @@ void SUnit::addPred(const SDep &D) {
   SUnit *N = D.getSUnit();
   // Update the bookkeeping.
   if (D.getKind() == SDep::Data) {
+    assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+    assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
     ++NumPreds;
     ++N->NumSuccs;
   }
-  if (!N->isScheduled)
+  if (!N->isScheduled) {
+    assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
     ++NumPredsLeft;
-  if (!isScheduled)
+  }
+  if (!isScheduled) {
+    assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
     ++N->NumSuccsLeft;
+  }
   Preds.push_back(D);
   N->Succs.push_back(P);
   if (P.getLatency() != 0) {
@@ -118,13 +127,19 @@ void SUnit::removePred(const SDep &D) {
       Preds.erase(I);
       // Update the bookkeeping.
       if (P.getKind() == SDep::Data) {
+        assert(NumPreds > 0 && "NumPreds will underflow!");
+        assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
         --NumPreds;
         --N->NumSuccs;
       }
-      if (!N->isScheduled)
+      if (!N->isScheduled) {
+        assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
         --NumPredsLeft;
-      if (!isScheduled)
+      }
+      if (!isScheduled) {
+        assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
         --N->NumSuccsLeft;
+      }
       if (P.getLatency() != 0) {
         this->setDepthDirty();
         N->setHeightDirty();
@@ -256,56 +271,58 @@ void SUnit::ComputeHeight() {
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
-  cerr << "SU(" << NodeNum << "): ";
+  errs() << "SU(" << NodeNum << "): ";
   G->dumpNode(this);
 }
 
 void SUnit::dumpAll(const ScheduleDAG *G) const {
   dump(G);
 
-  cerr << "  # preds left       : " << NumPredsLeft << "\n";
-  cerr << "  # succs left       : " << NumSuccsLeft << "\n";
-  cerr << "  Latency            : " << Latency << "\n";
-  cerr << "  Depth              : " << Depth << "\n";
-  cerr << "  Height             : " << Height << "\n";
+  errs() << "  # preds left       : " << NumPredsLeft << "\n";
+  errs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  errs() << "  Latency            : " << Latency << "\n";
+  errs() << "  Depth              : " << Depth << "\n";
+  errs() << "  Height             : " << Height << "\n";
 
   if (Preds.size() != 0) {
-    cerr << "  Predecessors:\n";
+    errs() << "  Predecessors:\n";
     for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
          I != E; ++I) {
-      cerr << "   ";
+      errs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        cerr << "val "; break;
-      case SDep::Anti:        cerr << "anti"; break;
-      case SDep::Output:      cerr << "out "; break;
-      case SDep::Order:       cerr << "ch  "; break;
+      case SDep::Data:        errs() << "val "; break;
+      case SDep::Anti:        errs() << "anti"; break;
+      case SDep::Output:      errs() << "out "; break;
+      case SDep::Order:       errs() << "ch  "; break;
       }
-      cerr << "#";
-      cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      errs() << "#";
+      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        cerr << " *";
-      cerr << "\n";
+        errs() << " *";
+      errs() << ": Latency=" << I->getLatency();
+      errs() << "\n";
     }
   }
   if (Succs.size() != 0) {
-    cerr << "  Successors:\n";
+    errs() << "  Successors:\n";
     for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
          I != E; ++I) {
-      cerr << "   ";
+      errs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        cerr << "val "; break;
-      case SDep::Anti:        cerr << "anti"; break;
-      case SDep::Output:      cerr << "out "; break;
-      case SDep::Order:       cerr << "ch  "; break;
+      case SDep::Data:        errs() << "val "; break;
+      case SDep::Anti:        errs() << "anti"; break;
+      case SDep::Output:      errs() << "out "; break;
+      case SDep::Order:       errs() << "ch  "; break;
       }
-      cerr << "#";
-      cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      errs() << "#";
+      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        cerr << " *";
-      cerr << "\n";
+        errs() << " *";
+      errs() << ": Latency=" << I->getLatency();
+      errs() << "\n";
     }
   }
-  cerr << "\n";
+  errs() << "\n";
 }
 
 #ifndef NDEBUG
@@ -323,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
         continue;
       }
       if (!AnyNotSched)
-        cerr << "*** Scheduling failed! ***\n";
+        errs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      cerr << "has not been scheduled!\n";
+      errs() << "has not been scheduled!\n";
       AnyNotSched = true;
     }
     if (SUnits[i].isScheduled &&
         (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
           unsigned(INT_MAX)) {
       if (!AnyNotSched)
-        cerr << "*** Scheduling failed! ***\n";
+        errs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      cerr << "has an unexpected "
+      errs() << "has an unexpected "
            << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
     }
     if (isBottomUp) {
       if (SUnits[i].NumSuccsLeft != 0) {
         if (!AnyNotSched)
-          cerr << "*** Scheduling failed! ***\n";
+          errs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        cerr << "has successors left!\n";
+        errs() << "has successors left!\n";
         AnyNotSched = true;
       }
     } else {
       if (SUnits[i].NumPredsLeft != 0) {
         if (!AnyNotSched)
-          cerr << "*** Scheduling failed! ***\n";
+          errs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        cerr << "has predecessors left!\n";
+        errs() << "has predecessors left!\n";
         AnyNotSched = true;
       }
     }
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 770f5bb..0d15c02 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -28,10 +28,6 @@
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
-void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) {
-  MI->addMemOperand(MF, MO);
-}
-
 void ScheduleDAG::EmitNoop() {
   TII->insertNoop(*BB, InsertPos);
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 8e18b3d..44e9296 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -14,8 +14,10 @@
 
 #define DEBUG_TYPE "sched-instrs"
 #include "ScheduleDAGInstrs.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetMachine.h"
@@ -45,35 +47,24 @@ void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
   ScheduleDAG::Run(bb, end);
 }
 
-/// getOpcode - If this is an Instruction or a ConstantExpr, return the
-/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getOpcode();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    return CE->getOpcode();
-  // Use UserOp1 to mean there's no opcode.
-  return Instruction::UserOp1;
-}
-
 /// getUnderlyingObjectFromInt - This is the function that does the work of
 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
 static const Value *getUnderlyingObjectFromInt(const Value *V) {
   do {
-    if (const User *U = dyn_cast<User>(V)) {
+    if (const Operator *U = dyn_cast<Operator>(V)) {
       // If we find a ptrtoint, we can transfer control back to the
       // regular getUnderlyingObjectFromInt.
-      if (getOpcode(U) == Instruction::PtrToInt)
+      if (U->getOpcode() == Instruction::PtrToInt)
         return U->getOperand(0);
       // If we find an add of a constant or a multiplied value, it's
       // likely that the other operand will lead us to the base
       // object. We don't have to worry about the case where the
-      // object address is somehow being computed bt the multiply,
+      // object address is somehow being computed by the multiply,
       // because our callers only care when the result is an
       // identifibale object.
-      if (getOpcode(U) != Instruction::Add ||
+      if (U->getOpcode() != Instruction::Add ||
           (!isa<ConstantInt>(U->getOperand(1)) &&
-           getOpcode(U->getOperand(1)) != Instruction::Mul))
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
         return V;
       V = U->getOperand(0);
     } else {
@@ -90,7 +81,7 @@ static const Value *getUnderlyingObject(const Value *V) {
   do {
     V = V->getUnderlyingObject();
     // If it found an inttoptr, use special code to continue climing.
-    if (getOpcode(V) != Instruction::IntToPtr)
+    if (Operator::getOpcode(V) != Instruction::IntToPtr)
       break;
     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
     // If that succeeded in finding a pointer, continue the search.
@@ -106,11 +97,11 @@ static const Value *getUnderlyingObject(const Value *V) {
 /// object, return the Value for that object. Otherwise return null.
 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) {
   if (!MI->hasOneMemOperand() ||
-      !MI->memoperands_begin()->getValue() ||
-      MI->memoperands_begin()->isVolatile())
+      !(*MI->memoperands_begin())->getValue() ||
+      (*MI->memoperands_begin())->isVolatile())
     return 0;
 
-  const Value *V = MI->memoperands_begin()->getValue();
+  const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
     return 0;
 
@@ -132,7 +123,7 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
     }
 }
 
-void ScheduleDAGInstrs::BuildSchedGraph() {
+void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // We'll be allocating one SUnit for each instruction, plus one for
   // the region exit node.
   SUnits.reserve(BB->size());
@@ -155,8 +146,8 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
   bool UnitLatencies = ForceUnitLatencies();
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
-  unsigned SpecialAddressLatency =
-    TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 
   // Walk the list of instructions, from bottom moving up.
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
@@ -184,16 +175,20 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
       std::vector<SUnit *> &UseList = Uses[Reg];
       std::vector<SUnit *> &DefList = Defs[Reg];
-      // Optionally add output and anti dependencies.
-      // TODO: Using a latency of 1 here assumes there's no cost for
-      //       reusing registers.
+      // Optionally add output and anti dependencies. For anti
+      // dependencies we use a latency of 0 because for a multi-issue
+      // target we want to allow the defining instruction to issue
+      // in the same cycle as the using instruction.
+      // TODO: Using a latency of 1 here for output dependencies assumes
+      //       there's no cost for reusing registers.
       SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
       for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
         SUnit *DefSU = DefList[i];
         if (DefSU != SU &&
             (Kind != SDep::Output || !MO.isDead() ||
              !DefSU->getInstr()->registerDefIsDead(Reg)))
-          DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
       }
       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         std::vector<SUnit *> &DefList = Defs[*Alias];
@@ -202,7 +197,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
           if (DefSU != SU &&
               (Kind != SDep::Output || !MO.isDead() ||
                !DefSU->getInstr()->registerDefIsDead(Reg)))
-            DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
         }
       }
 
@@ -216,6 +211,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
             // Optionally add in a special extra latency for nodes that
             // feed addresses.
             // TODO: Do this for register aliases too.
+            // TODO: Perhaps we should get rid of
+            // SpecialAddressLatency and just move this into
+            // adjustSchedDependency for the targets that care about
+            // it.
             if (SpecialAddressLatency != 0 && !UnitLatencies) {
               MachineInstr *UseMI = UseSU->getInstr();
               const TargetInstrDesc &UseTID = UseMI->getDesc();
@@ -226,15 +225,29 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
                   UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
                 LDataLatency += SpecialAddressLatency;
             }
-            UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
+            // Adjust the dependence latency using operand def/use
+            // information (if any), and then allow the target to
+            // perform its own adjustments.
+            const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            }
+            UseSU->addPred(dep);
           }
         }
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           std::vector<SUnit *> &UseList = Uses[*Alias];
           for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
             SUnit *UseSU = UseList[i];
-            if (UseSU != SU)
-              UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+            if (UseSU != SU) {
+              const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+              if (!UnitLatencies) {
+                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              }
+              UseSU->addPred(dep);
+            }
           }
         }
 
@@ -323,10 +336,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
       if (!ChainTID.isCall() &&
           !ChainTID.hasUnmodeledSideEffects() &&
           ChainMI->hasOneMemOperand() &&
-          !ChainMI->memoperands_begin()->isVolatile() &&
-          ChainMI->memoperands_begin()->getValue())
+          !(*ChainMI->memoperands_begin())->isVolatile() &&
+          (*ChainMI->memoperands_begin())->getValue())
         // We know that the Chain accesses one specific memory location.
-        ChainMMO = &*ChainMI->memoperands_begin();
+        ChainMMO = *ChainMI->memoperands_begin();
       else
         // Unknown memory accesses. Assume the worst.
         ChainMMO = 0;
@@ -362,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
         // Treat all other stores conservatively.
         goto new_chain;
     } else if (TID.mayLoad()) {
-      if (TII->isInvariantLoad(MI)) {
+      if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else if (const Value *V = getUnderlyingObjectForInstr(MI)) {
         // A load from a specific PseudoSourceValue. Add precise dependencies.
@@ -409,10 +422,9 @@ void ScheduleDAGInstrs::FinishBlock() {
 void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
   const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
 
-  // Compute the latency for the node.  We use the sum of the latencies for
-  // all nodes flagged together into this SUnit.
+  // Compute the latency for the node.
   SU->Latency =
-    InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
 
   // Simplistic target-independent heuristic: assume that loads take
   // extra time.
@@ -421,6 +433,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
       SU->Latency += 2;
 }
 
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+                                              SDep& dep) const {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty())
+    return;
+  
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+    if (DefCycle >= 0) {
+      MachineInstr *UseMI = Use->getInstr();
+      const unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+      // For all uses of the register, calculate the maxmimum latency
+      int Latency = -1;
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
+        if (UseCycle >= 0)
+          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+      }
+
+      // If we found a latency, then replace the existing dependence latency.
+      if (Latency >= 0)
+        dep.setLatency(Latency);
+    }
+  }
+}
+
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
   SU->getInstr()->dump();
 }
@@ -438,7 +494,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 }
 
 // EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+MachineBasicBlock *ScheduleDAGInstrs::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   // For MachineInstr-based scheduling, we're rescheduling the instructions in
   // the block, so start by removing them from the block.
   while (Begin != InsertPos) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 00d6268..29e1c98 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -15,12 +15,13 @@
 #ifndef SCHEDULEDAGINSTRS_H
 #define SCHEDULEDAGINSTRS_H
 
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
@@ -120,7 +121,6 @@ namespace llvm {
     SmallSet<unsigned, 8> LoopLiveInRegs;
 
   public:
-    MachineBasicBlock *BB;                // Current basic block
     MachineBasicBlock::iterator Begin;    // The beginning of the range to
                                           // be scheduled. The range extends
                                           // to InsertPos.
@@ -154,13 +154,20 @@ namespace llvm {
 
     /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
     /// input.
-    virtual void BuildSchedGraph();
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
 
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
 
-    virtual MachineBasicBlock *EmitSchedule();
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
+    virtual MachineBasicBlock*
+    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*);
 
     /// StartBlock - Prepare to perform scheduling in the given block.
     ///
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 5efd274..95ad05e 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -86,14 +86,14 @@ void ScheduleDAG::viewGraph() {
 // This code is only for debugging!
 #ifndef NDEBUG
   if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
-              BB->getBasicBlock()->getName());
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + 
+              ":" + BB->getBasicBlock()->getNameStr());
   else
-    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getName());
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
 #else
-  cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 4ffe88f..c766859 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG
   CallingConvLower.cpp
   DAGCombiner.cpp
   FastISel.cpp
+  InstrEmitter.cpp
   LegalizeDAG.cpp
   LegalizeFloatTypes.cpp
   LegalizeIntegerTypes.cpp
@@ -9,13 +10,12 @@ add_llvm_library(LLVMSelectionDAG
   LegalizeTypesGeneric.cpp
   LegalizeVectorOps.cpp
   LegalizeVectorTypes.cpp
-  ScheduleDAGSDNodes.cpp
-  ScheduleDAGSDNodesEmit.cpp
   ScheduleDAGFast.cpp
   ScheduleDAGList.cpp
   ScheduleDAGRRList.cpp
-  SelectionDAGBuild.cpp
+  ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
+  SelectionDAGBuild.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index 7cd2b73..fbe40b6 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -13,15 +13,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
-                 SmallVector<CCValAssign, 16> &locs)
+CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
+                 SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
   : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
-    TRI(*TM.getRegisterInfo()), Locs(locs) {
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
   // No stack is used.
   StackOffset = 0;
   
@@ -31,8 +33,8 @@ CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
 // HandleByVal - Allocate a stack slot large enough to pass an argument by
 // value. The size and alignment information of the argument is encoded in its
 // parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
-                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+                          EVT LocVT, CCValAssign::LocInfo LocInfo,
                           int MinSize, int MinAlign,
                           ISD::ArgFlagsTy ArgFlags) {
   unsigned Align = ArgFlags.getByValAlign();
@@ -55,94 +57,107 @@ void CCState::MarkAllocated(unsigned Reg) {
       UsedRegs[Reg/32] |= 1 << (Reg&31);
 }
 
-/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// AnalyzeFormalArguments - Analyze an array of argument values,
 /// incorporating info about the formals into this state.
-void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
-  unsigned NumArgs = TheArgs->getNumValues()-1;
-  
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  unsigned NumArgs = Ins.size();
+
   for (unsigned i = 0; i != NumArgs; ++i) {
-    MVT ArgVT = TheArgs->getValueType(i);
-    ISD::ArgFlagsTy ArgFlags =
-      cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags();
+    EVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-      cerr << "Formal argument #" << i << " has unhandled type "
-           << ArgVT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Formal argument #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
-/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// AnalyzeReturn - Analyze the returned values of a return,
 /// incorporating info about the result values into this state.
-void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                            CCAssignFn Fn) {
   // Determine which register each value should be copied into.
-  for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
-    MVT VT = TheRet->getOperand(i*2+1).getValueType();
-    ISD::ArgFlagsTy ArgFlags =
-      cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags();
-    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){
-      cerr << "Return operand #" << i << " has unhandled type "
-           << VT.getMVTString() << "\n";
-      abort();
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    EVT VT = Outs[i].Val.getValueType();
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      errs() << "Return operand #" << i << " has unhandled type "
+             << VT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
 
-/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
-/// about the passed values into this state.
-void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) {
-  unsigned NumOps = TheCall->getNumArgs();
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = Outs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    MVT ArgVT = TheCall->getArg(i).getValueType();
-    ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+    EVT ArgVT = Outs[i].Val.getValueType();
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-      cerr << "Call operand #" << i << " has unhandled type "
-           << ArgVT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
 /// AnalyzeCallOperands - Same as above except it takes vectors of types
 /// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
                                   SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                                   CCAssignFn Fn) {
   unsigned NumOps = ArgVTs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    MVT ArgVT = ArgVTs[i];
+    EVT ArgVT = ArgVTs[i];
     ISD::ArgFlagsTy ArgFlags = Flags[i];
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-      cerr << "Call operand #" << i << " has unhandled type "
-           << ArgVT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
-/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// AnalyzeCallResult - Analyze the return values of a call,
 /// incorporating info about the passed values into this state.
-void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) {
-  for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) {
-    MVT VT = TheCall->getRetValType(i);
-    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (TheCall->isInreg())
-      Flags.setInReg();
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    EVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
-      cerr << "Call result #" << i << " has unhandled type "
-           << VT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Call result #" << i << " has unhandled type "
+             << VT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
 /// AnalyzeCallResult - Same as above except it's specialized for calls which
 /// produce a single value.
-void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
-    cerr << "Call result has unhandled type "
-         << VT.getMVTString() << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "Call result has unhandled type "
+           << VT.getEVTString();
+#endif
+    llvm_unreachable(0);
   }
 }
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 609ec82..1ed3082 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19,6 +19,7 @@
 #define DEBUG_TYPE "dagcombine"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
@@ -33,7 +34,9 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
@@ -213,12 +216,12 @@ namespace {
     SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
                              SDValue N3, ISD::CondCode CC,
                              bool NotExtCompare = false);
-    SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           DebugLoc DL, bool foldBooleans = true);
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
-    SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);
-    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);
+    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
@@ -236,14 +239,17 @@ namespace {
     /// overlap.
     bool isAlias(SDValue Ptr1, int64_t Size1,
                  const Value *SrcValue1, int SrcValueOffset1,
+                 unsigned SrcValueAlign1,
                  SDValue Ptr2, int64_t Size2,
-                 const Value *SrcValue2, int SrcValueOffset2) const;
+                 const Value *SrcValue2, int SrcValueOffset2,
+                 unsigned SrcValueAlign2) const;
 
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
                        SDValue &Ptr, int64_t &Size,
-                       const Value *&SrcValue, int &SrcValueOffset) const;
+                       const Value *&SrcValue, int &SrcValueOffset,
+                       unsigned &SrcValueAlignment) const;
 
     /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
     /// looking for a better chain (aliasing node.)
@@ -251,7 +257,7 @@ namespace {
 
     /// getShiftAmountTy - Returns a type large enough to hold any valid
     /// shift amount - before type legalization these can be huge.
-    MVT getShiftAmountTy() {
+    EVT getShiftAmountTy() {
       return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
     }
 
@@ -392,7 +398,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
   switch (Op.getOpcode()) {
-  default: assert(0 && "Unknown code");
+  default: llvm_unreachable("Unknown code");
   case ISD::ConstantFP: {
     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
     V.changeSign();
@@ -495,7 +501,7 @@ static bool isOneUseSetCC(SDValue N) {
 
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
                                     SDValue N0, SDValue N1) {
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
     if (isa<ConstantSDNode>(N1)) {
       // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
@@ -537,10 +543,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                                bool AddTo) {
   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
   ++NodesCombined;
-  DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));
-  DOUT << " and " << NumTo-1 << " other values\n";
-  DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)
+  DEBUG(errs() << "\nReplacing.1 ";
+        N->dump(&DAG);
+        errs() << "\nWith: ";
+        To[0].getNode()->dump(&DAG);
+        errs() << " and " << NumTo-1 << " other values\n";
+        for (unsigned i = 0, e = NumTo; i != e; ++i)
           assert(N->getValueType(i) == To[i].getValueType() &&
                  "Cannot combine value to value of different type!"));
   WorkListRemover DeadNodes(*this);
@@ -612,9 +620,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 
   // Replace the old value with the new one.
   ++NodesCombined;
-  DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));
-  DOUT << '\n';
+  DEBUG(errs() << "\nReplacing.2 "; 
+        TLO.Old.getNode()->dump(&DAG);
+        errs() << "\nWith: ";
+        TLO.New.getNode()->dump(&DAG);
+        errs() << '\n');
 
   CommitTargetLoweringOpt(TLO);
   return true;
@@ -680,9 +690,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
-    DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
-    DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));
-    DOUT << '\n';
+    DEBUG(errs() << "\nReplacing.3 "; 
+          N->dump(&DAG);
+          errs() << "\nWith: ";
+          RV.getNode()->dump(&DAG);
+          errs() << '\n');
     WorkListRemover DeadNodes(*this);
     if (N->getNumValues() == RV.getNode()->getNumValues())
       DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -800,7 +812,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
 
       // Expose the DAG combiner to the target combiner impls.
       TargetLowering::DAGCombinerInfo
-        DagCombineInfo(DAG, Level == Unrestricted, false, this);
+        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
 
       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
     }
@@ -877,7 +889,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
         break;
 
       case ISD::TokenFactor:
-        if ((CombinerAA || Op.hasOneUse()) &&
+        if (Op.hasOneUse() &&
             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
           // Queue up for processing.
           TFs.push_back(Op.getNode());
@@ -898,7 +910,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       }
     }
   }
-
+  
   SDValue Result;
 
   // If we've change things around then replace token factor.
@@ -922,9 +934,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
 /// MERGE_VALUES can always be eliminated.
 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   WorkListRemover DeadNodes(*this);
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
-                                  &DeadNodes);
+  // Replacing results may cause a different MERGE_VALUES to suddenly
+  // be CSE'd with N, and carry its uses with it. Iterate until no
+  // uses remain, to ensure that the node can be safely deleted.
+  do {
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+                                    &DeadNodes);
+  } while (!N->use_empty());
   removeFromWorkList(N);
   DAG.DeleteNode(N);
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -933,7 +950,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
 static
 SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
                               SelectionDAG &DAG) {
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   SDValue N00 = N0.getOperand(0);
   SDValue N01 = N0.getOperand(1);
   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
@@ -957,7 +974,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1080,7 +1097,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
   if (N->hasNUsesOfValue(0, 1))
@@ -1142,7 +1159,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1215,7 +1232,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1308,7 +1325,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1395,7 +1412,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1415,7 +1432,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   if (N1.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
-        MVT ADDVT = N1.getOperand(1).getValueType();
+        EVT ADDVT = N1.getOperand(1).getValueType();
         SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
                                   N1.getOperand(1),
                                   DAG.getConstant(SHC->getAPIntValue()
@@ -1447,7 +1464,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (srem c1, c2) -> c1%c2
   if (N0C && N1C && !N1C->isNullValue())
@@ -1489,7 +1506,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (urem c1, c2) -> c1%c2
   if (N0C && N1C && !N1C->isNullValue())
@@ -1541,7 +1558,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (mulhs x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1562,7 +1579,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (mulhu x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1665,7 +1682,7 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
 /// two operands of the same opcode, try to simplify it.
 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
 
   // For each of OP in AND/OR/XOR:
@@ -1677,7 +1694,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
        N0.getOpcode() == ISD::SIGN_EXTEND ||
        (N0.getOpcode() == ISD::TRUNCATE &&
         !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
     SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                  N0.getOperand(0).getValueType(),
                                  N0.getOperand(0), N1.getOperand(0));
@@ -1709,7 +1728,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue LL, LR, RL, RR, CC0, CC1;
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N1.getValueType();
+  EVT VT = N1.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
 
   // fold vector ops
@@ -1820,18 +1839,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // fold (zext_inreg (extload x)) -> (zextload x)
   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
     unsigned BitWidth = N1.getValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - EVT.getSizeInBits())) &&
+                                     BitWidth - MemVT.getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(), LN0->getBasePtr(),
                                        LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -1842,18 +1861,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
       N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
     unsigned BitWidth = N1.getValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - EVT.getSizeInBits())) &&
+                                     BitWidth - MemVT.getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -1869,24 +1888,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         LN0->isUnindexed() && N0.hasOneUse() &&
         // Do not change the width of a volatile load.
         !LN0->isVolatile()) {
-      MVT EVT = MVT::Other;
+      EVT ExtVT = MVT::Other;
       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
       if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        EVT = MVT::getIntegerVT(ActiveBits);
+        ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
 
-      MVT LoadedVT = LN0->getMemoryVT();
+      EVT LoadedVT = LN0->getMemoryVT();
 
       // Do not generate loads of non-round integer types since these can
       // be expensive (and would be wrong if the type is not byte sized).
-      if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
-        MVT PtrType = N0.getOperand(1).getValueType();
+      if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+        EVT PtrType = N0.getOperand(1).getValueType();
 
         // For big endian targets, we need to add an offset to the pointer to
         // load the correct bytes.  For little endian systems, we merely need to
         // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;
-        unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;
+        unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+        unsigned EVTStoreBytes = ExtVT.getStoreSize();
         unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
         unsigned Alignment = LN0->getAlignment();
         SDValue NewPtr = LN0->getBasePtr();
@@ -1901,7 +1920,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         SDValue Load =
           DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
                          NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         EVT, LN0->isVolatile(), Alignment);
+                         ExtVT, LN0->isVolatile(), Alignment);
         AddToWorkList(N);
         CombineTo(N0.getNode(), Load, Load.getValue(1));
         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1918,7 +1937,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue LL, LR, RL, RR, CC0, CC1;
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N1.getValueType();
+  EVT VT = N1.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1928,7 +1947,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
 
   // fold (or x, undef) -> -1
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(~0ULL, VT);
+    return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
   // fold (or c1, c2) -> c1|c2
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
@@ -2058,7 +2077,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
 // a rot[lr].
 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
-  MVT VT = LHS.getValueType();
+  EVT VT = LHS.getValueType();
   if (!TLI.isTypeLegal(VT)) return 0;
 
   // The target must have at least one rotate flavor.
@@ -2219,7 +2238,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue LHS, RHS, CC;
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -2258,8 +2277,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
       switch (N0.getOpcode()) {
       default:
-        assert(0 && "Unhandled SetCC Equivalent!");
-        abort();
+        llvm_unreachable("Unhandled SetCC Equivalent!");
       case ISD::SETCC:
         return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
       case ISD::SELECT_CC:
@@ -2388,7 +2406,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
     return SDValue();
 
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // If this is a signed shift right, and the high bit is modified by the
   // logical operation, do not perform the transformation. The highBitSet
@@ -2418,7 +2436,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getSizeInBits();
 
   // fold (shl c1, c2) -> c1<<c2
@@ -2443,7 +2461,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
     SDValue N101 = N1.getOperand(0).getOperand(1);
     if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
       TruncC.trunc(TruncVT.getSizeInBits());
@@ -2474,20 +2492,33 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::SRL &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),
-                               DAG.getConstant(~0ULL << c1, VT));
-    if (c2 > c1)
-      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
-                         DAG.getConstant(c2-c1, N1.getValueType()));
-    else
-      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
-                         DAG.getConstant(c1-c2, N1.getValueType()));
+    if (c1 < VT.getSizeInBits()) {
+      uint64_t c2 = N1C->getZExtValue();
+      SDValue HiBitsMask =
+        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                              VT.getSizeInBits() - c1),
+                        VT);
+      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+                                 N0.getOperand(0),
+                                 HiBitsMask);
+      if (c2 > c1)
+        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c2-c1, N1.getValueType()));
+      else
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c1-c2, N1.getValueType()));
+    }
   }
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    SDValue HiBitsMask =
+      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                            VT.getSizeInBits() -
+                                              N1C->getZExtValue()),
+                      VT);
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
-                       DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));
+                       HiBitsMask);
+  }
 
   return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
 }
@@ -2497,7 +2528,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold (sra c1, c2) -> (sra c1, c2)
   if (N0C && N1C)
@@ -2518,7 +2549,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // sext_inreg.
   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
     unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
-    MVT EVT = MVT::getIntegerVT(LowBits);
+    EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
     if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
                          N0.getOperand(0), DAG.getValueType(EVT));
@@ -2545,8 +2576,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     if (N01C && N1C) {
       // Determine what the truncate's result bitsize and type would be.
       unsigned VTValSize = VT.getSizeInBits();
-      MVT TruncVT =
-        MVT::getIntegerVT(VTValSize - N1C->getZExtValue());
+      EVT TruncVT =
+        EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue());
       // Determine the residual right-shift amount.
       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
 
@@ -2576,7 +2607,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
     SDValue N101 = N1.getOperand(0).getOperand(1);
     if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
       TruncC.trunc(TruncVT.getSizeInBits());
@@ -2607,7 +2638,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getSizeInBits();
 
   // fold (srl c1, c2) -> c1 >>u c2
@@ -2641,7 +2672,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     // Shifting in all undef bits?
-    MVT SmallVT = N0.getOperand(0).getValueType();
+    EVT SmallVT = N0.getOperand(0).getValueType();
     if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
       return DAG.getUNDEF(VT);
 
@@ -2700,7 +2731,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
     SDValue N101 = N1.getOperand(0).getOperand(1);
     if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
       TruncC.trunc(TruncVT.getSizeInBits());
@@ -2724,7 +2755,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (ctlz c1) -> c2
   if (isa<ConstantSDNode>(N0))
@@ -2734,7 +2765,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
 
 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (cttz c1) -> c2
   if (isa<ConstantSDNode>(N0))
@@ -2744,7 +2775,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
 
 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (ctpop c1) -> c2
   if (isa<ConstantSDNode>(N0))
@@ -2759,8 +2790,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
-  MVT VT = N->getValueType(0);
-  MVT VT0 = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT VT0 = N0.getValueType();
 
   // fold (select C, X, X) -> X
   if (N1 == N2)
@@ -2825,7 +2856,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     // Check against MVT::Other for SELECT_CC, which is a workaround for targets
     // having to say they don't support SELECT_CC on every type the DAG knows
     // about, since there is no way to mark an opcode illegal at all value types
-    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))
+    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
       return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
                          N0.getOperand(0), N0.getOperand(1),
                          N1, N2, N0.getOperand(2));
@@ -2945,7 +2977,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
 
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (sext c1) -> c1
   if (isa<ConstantSDNode>(N0))
@@ -3054,13 +3086,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
-        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {
+        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
@@ -3071,14 +3103,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
-  // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
   if (N0.getOpcode() == ISD::SETCC) {
+    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    if (VT.isVector() &&
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
+      
+        // Only do this before legalize for now.
+        !LegalOperations) {
+      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                           N0.getOperand(1),
+                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
+    }
+    
+    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    SDValue NegOne =
+      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+                       NegOne, DAG.getConstant(0, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
   }
+  
+  
 
   // fold (sext x) -> (zext x) if the sign bit is known zero.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3090,7 +3142,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (zext c1) -> c1
   if (isa<ConstantSDNode>(N0))
@@ -3194,13 +3246,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
-        TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {
+        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
@@ -3225,7 +3277,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (aext c1) -> c1
   if (isa<ConstantSDNode>(N0))
@@ -3330,11 +3382,11 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
       N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
                                      VT, LN0->getChain(), LN0->getBasePtr(),
                                      LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->getSrcValueOffset(), MemVT,
                                      LN0->isVolatile(), LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
@@ -3400,8 +3452,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   unsigned Opc = N->getOpcode();
   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
-  MVT EVT = VT;
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = VT;
 
   // This transformation isn't valid for vector loads.
   if (VT.isVector())
@@ -3411,20 +3463,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   // extended to VT.
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
-    EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))
+    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
       return SDValue();
   }
 
-  unsigned EVTBits = EVT.getSizeInBits();
+  unsigned EVTBits = ExtVT.getSizeInBits();
   unsigned ShAmt = 0;
-  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       ShAmt = N01->getZExtValue();
       // Is the shift amount a multiple of size of VT?
       if ((ShAmt & (EVTBits-1)) == 0) {
         N0 = N0.getOperand(0);
-        if (N0.getValueType().getSizeInBits() <= EVTBits)
+        // Is the load width a multiple of size of VT?
+        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
           return SDValue();
       }
     }
@@ -3432,18 +3485,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
-  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&
+  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
       cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT PtrType = N0.getOperand(1).getValueType();
+    EVT PtrType = N0.getOperand(1).getValueType();
 
     // For big endian targets, we need to adjust the offset to the pointer to
     // load the correct bytes.
     if (TLI.isBigEndian()) {
       unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
-      unsigned EVTStoreBits = EVT.getStoreSizeInBits();
+      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
       ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
     }
 
@@ -3460,7 +3513,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
                     LN0->isVolatile(), NewAlign)
       : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       EVT, LN0->isVolatile(), NewAlign);
+                       ExtVT, LN0->isVolatile(), NewAlign);
 
     // Replace the old load's chain with the new load's chain.
     WorkListRemover DeadNodes(*this);
@@ -3477,8 +3530,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  MVT VT = N->getValueType(0);
-  MVT EVT = cast<VTSDNode>(N1)->getVT();
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N1)->getVT();
   unsigned VTBits = VT.getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
@@ -3573,7 +3626,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
@@ -3623,14 +3676,14 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
 
 /// CombineConsecutiveLoads - build_pair (load, load) -> load
 /// if load locations are consecutive.
-SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   assert(N->getOpcode() == ISD::BUILD_PAIR);
 
   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
     return SDValue();
-  MVT LD1VT = LD1->getValueType(0);
+  EVT LD1VT = LD1->getValueType(0);
   const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
 
   if (ISD::isNON_EXTLoad(LD2) &&
@@ -3642,7 +3695,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
       TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
     unsigned Align = LD1->getAlignment();
     unsigned NewAlign = TLI.getTargetData()->
-      getABITypeAlignment(VT.getTypeForMVT());
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
 
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
@@ -3656,7 +3709,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
 
 SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   // Only do this before legalize, since afterward the target may be depending
@@ -3674,7 +3727,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
         break;
       }
 
-    MVT DestEltVT = N->getValueType(0).getVectorElementType();
+    EVT DestEltVT = N->getValueType(0).getVectorElementType();
     assert(!DestEltVT.isVector() &&
            "Element type of vector ValueType must not be vector!");
     if (isSimple)
@@ -3684,7 +3737,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   // If the input is a constant, let getNode fold it.
   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
     SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
-    if (Res.getNode() != N) return Res;
+    if (Res.getNode() != N) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+        return Res;
+
+      // Folding it resulted in an illegal node, and it's too late to
+      // do that. Clean up the old node and forego the transformation.
+      // Ideally this won't happen very often, because instcombine
+      // and the earlier dagcombine runs (where illegal nodes are
+      // permitted) should have folded most of them already.
+      DAG.DeleteNode(Res.getNode());
+    }
   }
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
@@ -3700,7 +3764,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     unsigned Align = TLI.getTargetData()->
-      getABITypeAlignment(VT.getTypeForMVT());
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
     unsigned OrigAlign = LN0->getAlignment();
 
     if (Align <= OrigAlign) {
@@ -3743,7 +3807,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
       VT.isInteger() && !VT.isVector()) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
-    MVT IntXVT = MVT::getIntegerVT(OrigXWidth);
+    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
     if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
       SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
@@ -3791,7 +3855,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   return CombineConsecutiveLoads(N, VT);
 }
 
@@ -3799,8 +3863,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
 /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
 /// destination element value type.
 SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
-  MVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
 
   // If this is already the right type, we're done.
   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
@@ -3822,7 +3886,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
                                 DstEltVT, Op));
       AddToWorkList(Ops.back().getNode());
     }
-    MVT VT = MVT::getVectorVT(DstEltVT,
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                               BV->getValueType(0).getVectorNumElements());
     return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
                        &Ops[0], Ops.size());
@@ -3835,7 +3899,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
     // Convert the input float vector to a int vector where the elements are the
     // same sizes.
     assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
-    MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
     BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
     SrcEltVT = IntVT;
   }
@@ -3844,7 +3908,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
   // convert to integer first, then to FP of the right size.
   if (DstEltVT.isFloatingPoint()) {
     assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
-    MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());
+    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
     SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
 
     // Next, convert to FP elements of the same size.
@@ -3880,7 +3944,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
         Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
     }
 
-    MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
     return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
                        &Ops[0], Ops.size());
   }
@@ -3889,7 +3953,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
   // turns into multiple outputs.
   bool isS2V = ISD::isScalarToVector(BV);
   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
-  MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                            NumOutputsPerInput*BV->getNumOperands());
   SmallVector<SDValue, 8> Ops;
 
   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
@@ -3926,7 +3991,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -3967,7 +4032,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -4001,7 +4066,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -4024,7 +4089,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
-  // fold (fmul X, (fneg 1.0)) -> (fneg X)
+  // fold (fmul X, -1.0) -> (fneg X)
   if (N1CFP && N1CFP->isExactlyValue(-1.0))
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
@@ -4056,7 +4121,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -4089,7 +4154,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (frem c1, c2) -> fmod(c1,c2)
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
@@ -4103,7 +4168,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
@@ -4151,8 +4216,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  MVT VT = N->getValueType(0);
-  MVT OpVT = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128)
@@ -4173,8 +4238,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  MVT VT = N->getValueType(0);
-  MVT OpVT = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128)
@@ -4195,7 +4260,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fp_to_sint c1fp) -> c1
   if (N0CFP)
@@ -4207,7 +4272,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fp_to_uint c1fp) -> c1
   if (N0CFP && VT != MVT::ppcf128)
@@ -4220,7 +4285,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fp_round c1fp) -> c1fp
   if (N0CFP && N0.getValueType() != MVT::ppcf128)
@@ -4253,8 +4318,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
 
 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
 
   // fold (fp_round_inreg c1fp) -> c1fp
@@ -4269,7 +4334,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   if (N->hasOneUse() &&
@@ -4326,7 +4391,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
-    MVT IntVT = Int.getValueType();
+    EVT IntVT = Int.getValueType();
     if (IntVT.isInteger() && !IntVT.isVector()) {
       Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
               DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
@@ -4342,7 +4407,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
 SDValue DAGCombiner::visitFABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fabs c1) -> fabs(c1)
   if (N0CFP && VT != MVT::ppcf128)
@@ -4361,7 +4426,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
-    MVT IntVT = Int.getValueType();
+    EVT IntVT = Int.getValueType();
     if (IntVT.isInteger() && !IntVT.isVector()) {
       Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
              DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
@@ -4419,7 +4484,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     if (Op0.getOpcode() == ISD::AND &&
         Op0.hasOneUse() &&
         Op1.getOpcode() == ISD::Constant) {
-      SDValue AndOp0 = Op0.getOperand(0);
       SDValue AndOp1 = Op0.getOperand(1);
 
       if (AndOp1.getOpcode() == ISD::Constant) {
@@ -4491,7 +4555,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 
   bool isLoad = true;
   SDValue Ptr;
-  MVT VT;
+  EVT VT;
   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
     if (LD->isIndexed())
       return false;
@@ -4579,9 +4643,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
                                  BasePtr, Offset, AM);
   ++PreIndexedNodes;
   ++NodesCombined;
-  DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
-  DOUT << '\n';
+  DEBUG(errs() << "\nReplacing.4 ";
+        N->dump(&DAG);
+        errs() << "\nWith: ";
+        Result.getNode()->dump(&DAG);
+        errs() << '\n');
   WorkListRemover DeadNodes(*this);
   if (isLoad) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4616,7 +4682,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
 
   bool isLoad = true;
   SDValue Ptr;
-  MVT VT;
+  EVT VT;
   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
     if (LD->isIndexed())
       return false;
@@ -4652,7 +4718,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
     SDValue Offset;
     ISD::MemIndexedMode AM = ISD::UNINDEXED;
     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
-      if (Ptr == Offset)
+      if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
         std::swap(BasePtr, Offset);
       if (Ptr != BasePtr)
         continue;
@@ -4711,9 +4777,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
                                 BasePtr, Offset, AM);
         ++PostIndexedNodes;
         ++NodesCombined;
-        DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
-        DOUT << '\n';
+        DEBUG(errs() << "\nReplacing.5 ";
+              N->dump(&DAG);
+              errs() << "\nWith: ";
+              Result.getNode()->dump(&DAG);
+              errs() << '\n');
         WorkListRemover DeadNodes(*this);
         if (isLoad) {
           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4815,9 +4883,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         // v3         = add v2, c
         // Now we replace use of chain2 with chain1.  This makes the second load
         // isomorphic to the one we are deleting, and thus makes this load live.
-        DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));
-        DOUT << "\n";
+        DEBUG(errs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              errs() << "\nWith chain: ";
+              Chain.getNode()->dump(&DAG);
+              errs() << "\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
 
@@ -4833,9 +4903,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
       if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));
-        DOUT << " and 2 other values\n";
+        DEBUG(errs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              errs() << "\nWith: ";
+              Undef.getNode()->dump(&DAG);
+              errs() << " and 2 other values\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
@@ -4890,7 +4962,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // Create token factor to keep old chain connected.
       SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                   MVT::Other, Chain, ReplLoad.getValue(1));
-
+      
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+      
       // Replace uses with load result and token factor. Don't add users
       // to work list.
       return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -4917,7 +4992,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   SDValue Chain = ST->getChain();
   SDValue Value = ST->getValue();
   SDValue Ptr   = ST->getBasePtr();
-  MVT VT = Value.getValueType();
+  EVT VT = Value.getValueType();
 
   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
     return SDValue();
@@ -4944,12 +5019,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     unsigned ShAmt = Imm.countTrailingZeros();
     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
-    MVT NewVT = MVT::getIntegerVT(NewBW);
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     while (NewBW < BitWidth &&
            !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
              TLI.isNarrowingProfitable(VT, NewVT))) {
       NewBW = NextPowerOf2(NewBW);
-      NewVT = MVT::getIntegerVT(NewBW);
+      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     }
     if (NewBW >= BitWidth)
       return SDValue();
@@ -4971,7 +5046,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
       if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))
+          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -5024,9 +5099,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
-    MVT SVT = Value.getOperand(0).getValueType();
+    EVT SVT = Value.getOperand(0).getValueType();
     unsigned Align = TLI.getTargetData()->
-      getABITypeAlignment(SVT.getTypeForMVT());
+      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
     if (Align <= OrigAlign &&
         ((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
@@ -5043,8 +5118,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // transform should not be done in this case.
     if (Value.getOpcode() != ISD::TargetConstantFP) {
       SDValue Tmp;
-      switch (CFP->getValueType(0).getSimpleVT()) {
-      default: assert(0 && "Unknown FP type");
+      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown FP type");
       case MVT::f80:    // We don't do this for these yet.
       case MVT::f128:
       case MVT::ppcf128:
@@ -5111,8 +5186,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
     // If there is a better chain.
     if (Chain != BetterChain) {
-      // Replace the chain to avoid dependency.
       SDValue ReplStore;
+
+      // Replace the chain to avoid dependency.
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                       ST->getSrcValue(),ST->getSrcValueOffset(),
@@ -5128,6 +5204,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                   MVT::Other, Chain, ReplStore);
 
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
       // Don't add users to work list.
       return CombineTo(N, Token, false);
     }
@@ -5211,10 +5290,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   // BUILD_VECTOR with undef elements and the inserted element.
   if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 
       isa<ConstantSDNode>(EltNo)) {
-    MVT VT = InVec.getValueType();
-    MVT EVT = VT.getVectorElementType();
+    EVT VT = InVec.getValueType();
+    EVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
+    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
 
     unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     if (Elt < Ops.size())
@@ -5232,7 +5311,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
    // If the operand is wider than the vector element type then it is implicitly
    // truncated.  Make that explicit here.
-   MVT EltVT = InVec.getValueType().getVectorElementType();
+   EVT EltVT = InVec.getValueType().getVectorElementType();
    SDValue InOp = InVec.getOperand(0);
    if (InOp.getValueType() != EltVT)
      return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
@@ -5252,18 +5331,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
-    MVT VT = InVec.getValueType();
-    MVT EVT = VT.getVectorElementType();
-    MVT LVT = EVT;
+    EVT VT = InVec.getValueType();
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
 
     if (InVec.getOpcode() == ISD::BIT_CONVERT) {
-      MVT BCVT = InVec.getOperand(0).getValueType();
-      if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
       if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
         BCNumEltsChanged = true;
       InVec = InVec.getOperand(0);
-      EVT = BCVT.getVectorElementType();
+      ExtVT = BCVT.getVectorElementType();
       NewLoad = true;
     }
 
@@ -5272,7 +5351,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     if (ISD::isNormalLoad(InVec.getNode())) {
       LN0 = cast<LoadSDNode>(InVec);
     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-               InVec.getOperand(0).getValueType() == EVT &&
+               InVec.getOperand(0).getValueType() == ExtVT &&
                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
@@ -5306,7 +5385,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       // Check the resultant load doesn't need a higher alignment than the
       // original load.
       unsigned NewAlign =
-        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());
+        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
 
       if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
         return SDValue();
@@ -5317,7 +5396,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     SDValue NewPtr = LN0->getBasePtr();
     if (Elt) {
       unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
-      MVT PtrType = NewPtr.getValueType();
+      EVT PtrType = NewPtr.getValueType();
       if (TLI.isBigEndian())
         PtrOff = VT.getSizeInBits() / 8 - PtrOff;
       NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
@@ -5334,8 +5413,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   unsigned NumInScalars = N->getNumOperands();
-  MVT VT = N->getValueType(0);
-  MVT EltType = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -5432,11 +5510,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return SDValue();
   
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
 
   SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
 
   assert(N0.getValueType().getVectorNumElements() == NumElts &&
         "Vector shuffle must be normalized in DAG");
@@ -5494,7 +5571,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -5517,14 +5594,14 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
       }
 
       // Let's see if the target supports this vector_shuffle.
-      MVT RVT = RHS.getValueType();
+      EVT RVT = RHS.getValueType();
       if (!TLI.isVectorClearMaskLegal(Indices, RVT))
         return SDValue();
 
       // Return the new VECTOR_SHUFFLE node.
-      MVT EVT = RVT.getVectorElementType();
+      EVT EltVT = RVT.getVectorElementType();
       SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                     DAG.getConstant(0, EVT));
+                                     DAG.getConstant(0, EltVT));
       SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
                                  RVT, &ZeroOps[0], ZeroOps.size());
       LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
@@ -5543,10 +5620,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   // things. Simplifying them may result in a loss of legality.
   if (LegalOperations) return SDValue();
 
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
 
-  MVT EltType = VT.getVectorElementType();
+  EVT EltType = VT.getVectorElementType();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   SDValue Shuffle = XformToShuffleWithZero(N);
@@ -5589,7 +5666,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
     }
 
     if (Ops.size() == LHS.getNumOperands()) {
-      MVT VT = LHS.getValueType();
+      EVT VT = LHS.getValueType();
       return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
                          &Ops[0], Ops.size());
     }
@@ -5728,7 +5805,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   // (x ? y : y) -> y.
   if (N2 == N3) return N2;
   
-  MVT VT = N2.getValueType();
+  EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
@@ -5820,8 +5897,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       N2.getValueType().isInteger() &&
       (N1C->isNullValue() ||                         // (a < 0) ? b : 0
        (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
-    MVT XType = N0.getValueType();
-    MVT AType = N2.getValueType();
+    EVT XType = N0.getValueType();
+    EVT AType = N2.getValueType();
     if (XType.bitsGE(AType)) {
       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
       // single-bit constant.
@@ -5900,7 +5977,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   // FIXME: Turn all of these into setcc if setcc if setcc is legal
   // otherwise, go ahead with the folds.
   if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
-    MVT XType = N0.getValueType();
+    EVT XType = N0.getValueType();
     if (!LegalOperations ||
         TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
       SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
@@ -5942,7 +6019,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
       N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
       N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
-    MVT XType = N0.getValueType();
+    EVT XType = N0.getValueType();
     SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
                                 DAG.getConstant(XType.getSizeInBits()-1,
                                                 getShiftAmountTy()));
@@ -5957,7 +6034,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
       N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
     if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
-      MVT XType = N0.getValueType();
+      EVT XType = N0.getValueType();
       if (SubC->isNullValue() && XType.isInteger()) {
         SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
                                     N0,
@@ -5976,11 +6053,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
 }
 
 /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
-SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
                                    SDValue N1, ISD::CondCode Cond,
                                    DebugLoc DL, bool foldBooleans) {
   TargetLowering::DAGCombinerInfo
-    DagCombineInfo(DAG, Level == Unrestricted, false, this);
+    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
 }
 
@@ -6012,11 +6089,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   return S;
 }
 
-/// FindBaseOffset - Return true if base is known not to alias with anything
-/// but itself.  Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself.  Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+                           GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
-  Base = Ptr; Offset = 0;
+  Base = Ptr; Offset = 0; GV = 0; CV = 0;
 
   // If it's an adding a simple constant then integrate the offset.
   if (Base.getOpcode() == ISD::ADD) {
@@ -6025,36 +6103,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
       Offset += C->getZExtValue();
     }
   }
+  
+  // Return the underlying GlobalValue, and update the Offset.  Return false
+  // for GlobalAddressSDNode since the same GlobalAddress may be represented
+  // by multiple nodes with different offsets.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+    GV = G->getGlobal();
+    Offset += G->getOffset();
+    return false;
+  }
 
+  // Return the underlying Constant value, and update the Offset.  Return false
+  // for ConstantSDNodes since the same constant pool entry may be represented
+  // by multiple nodes with different offsets.
+  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+                                         : (void *)C->getConstVal();
+    Offset += C->getOffset();
+    return false;
+  }
   // If it's any of the following then it can't alias with anything but itself.
-  return isa<FrameIndexSDNode>(Base) ||
-         isa<ConstantPoolSDNode>(Base) ||
-         isa<GlobalAddressSDNode>(Base);
+  return isa<FrameIndexSDNode>(Base);
 }
 
 /// isAlias - Return true if there is any possibility that the two addresses
 /// overlap.
 bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
                           const Value *SrcValue1, int SrcValueOffset1,
+                          unsigned SrcValueAlign1,
                           SDValue Ptr2, int64_t Size2,
-                          const Value *SrcValue2, int SrcValueOffset2) const {
+                          const Value *SrcValue2, int SrcValueOffset2,
+                          unsigned SrcValueAlign2) const {
   // If they are the same then they must be aliases.
   if (Ptr1 == Ptr2) return true;
 
   // Gather base node and offset information.
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
-  bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
-  bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+  GlobalValue *GV1, *GV2;
+  void *CV1, *CV2;
+  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
 
-  // If they have a same base address then...
-  if (Base1 == Base2)
-    // Check to see if the addresses overlap.
+  // If they have a same base address then check to see if they overlap.
+  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
     return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
 
-  // If we know both bases then they can't alias.
-  if (KnownBase1 && KnownBase2) return false;
+  // If we know what the bases are, and they aren't identical, then we know they
+  // cannot alias.
+  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+    return false;
 
+  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+  // compared to the size and offset of the access, we may be able to prove they
+  // do not alias.  This check is conservative for now to catch cases created by
+  // splitting vector types.
+  if ((SrcValueAlign1 == SrcValueAlign2) &&
+      (SrcValueOffset1 != SrcValueOffset2) &&
+      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+    
+    // There is no overlap between these relatively aligned accesses of similar
+    // size, return no alias.
+    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+      return false;
+  }
+  
   if (CombinerGlobalAA) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
@@ -6074,20 +6189,24 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
                         SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue, int &SrcValueOffset) const {
+                        const Value *&SrcValue, 
+                        int &SrcValueOffset,
+                        unsigned &SrcValueAlign) const {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     Size = LD->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = LD->getSrcValue();
     SrcValueOffset = LD->getSrcValueOffset();
+    SrcValueAlign = LD->getOriginalAlignment();
     return true;
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
     Size = ST->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = ST->getSrcValue();
     SrcValueOffset = ST->getSrcValueOffset();
+    SrcValueAlign = ST->getOriginalAlignment();
   } else {
-    assert(0 && "FindAliasInfo expected a memory operand");
+    llvm_unreachable("FindAliasInfo expected a memory operand");
   }
 
   return false;
@@ -6098,28 +6217,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
                                    SmallVector<SDValue, 8> &Aliases) {
   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
-  std::set<SDNode *> Visited;           // Visited node set.
+  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
 
   // Get alias information for node.
   SDValue Ptr;
-  int64_t Size = 0;
-  const Value *SrcValue = 0;
-  int SrcValueOffset = 0;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  unsigned SrcValueAlign;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 
+                              SrcValueAlign);
 
   // Starting off.
   Chains.push_back(OriginalChain);
-
+  unsigned Depth = 0;
+  
   // Look at each chain and determine if it is an alias.  If so, add it to the
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
     SDValue Chain = Chains.back();
     Chains.pop_back();
+    
+    // For TokenFactor nodes, look at each operand and only continue up the 
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll 
+    // find more and revert to original chain since the xform is unlikely to be
+    // profitable.
+    // 
+    // FIXME: The depth check could be made to return the last non-aliasing 
+    // chain we found before we hit a tokenfactor rather than the original
+    // chain.
+    if (Depth > 6 || Aliases.size() == 2) {
+      Aliases.clear();
+      Aliases.push_back(OriginalChain);
+      break;
+    }
 
-     // Don't bother if we've been before.
-    if (Visited.find(Chain.getNode()) != Visited.end()) continue;
-    Visited.insert(Chain.getNode());
+    // Don't bother if we've been before.
+    if (!Visited.insert(Chain.getNode()))
+      continue;
 
     switch (Chain.getOpcode()) {
     case ISD::EntryToken:
@@ -6130,35 +6266,40 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
     case ISD::STORE: {
       // Get alias information for Chain.
       SDValue OpPtr;
-      int64_t OpSize = 0;
-      const Value *OpSrcValue = 0;
-      int OpSrcValueOffset = 0;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      unsigned OpSrcValueAlign;
       bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
-                                    OpSrcValue, OpSrcValueOffset);
+                                    OpSrcValue, OpSrcValueOffset,
+                                    OpSrcValueAlign);
 
       // If chain is alias then stop here.
       if (!(IsLoad && IsOpLoad) &&
-          isAlias(Ptr, Size, SrcValue, SrcValueOffset,
-                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+                  OpSrcValueAlign)) {
         Aliases.push_back(Chain);
       } else {
         // Look further up the chain.
         Chains.push_back(Chain.getOperand(0));
-        // Clean up old chain.
-        AddToWorkList(Chain.getNode());
+        ++Depth;
       }
       break;
     }
 
     case ISD::TokenFactor:
-      // We have to check each of the operands of the token factor, so we queue
-      // then up.  Adding the  operands to the queue (stack) in reverse order
-      // maintains the original order and increases the likelihood that getNode
-      // will find a matching token factor (CSE.)
+      // We have to check each of the operands of the token factor for "small"
+      // token factors, so we queue them up.  Adding the operands to the queue
+      // (stack) in reverse order maintains the original order and increases the
+      // likelihood that getNode will find a matching token factor (CSE.)
+      if (Chain.getNumOperands() > 16) {
+        Aliases.push_back(Chain);
+        break;
+      }
       for (unsigned n = Chain.getNumOperands(); n;)
         Chains.push_back(Chain.getOperand(--n));
-      // Eliminate the token factor if we can.
-      AddToWorkList(Chain.getNode());
+      ++Depth;
       break;
 
     default:
@@ -6184,15 +6325,10 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
     // If a single operand then chain to it.  We don't need to revisit it.
     return Aliases[0];
   }
-
+  
   // Construct a custom tailored token factor.
-  SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
-                                 &Aliases[0], Aliases.size());
-
-  // Make sure the old chain gets cleaned up.
-  if (NewChain != OldChain) AddToWorkList(OldChain.getNode());
-
-  return NewChain;
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 
+                     &Aliases[0], Aliases.size());
 }
 
 // SelectionDAG::Combine - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index cd2d5ac..8e955af 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -57,7 +57,7 @@
 using namespace llvm;
 
 unsigned FastISel::getRegForValue(Value *V) {
-  MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+  EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
   if (!RealVT.isSimple())
     return 0;
@@ -65,11 +65,11 @@ unsigned FastISel::getRegForValue(Value *V) {
   // Ignore illegal types. We must do this before looking up the value
   // in ValueMap because Arguments are given virtual registers regardless
   // of whether FastISel can handle them.
-  MVT::SimpleValueType VT = RealVT.getSimpleVT();
+  MVT VT = RealVT.getSimpleVT();
   if (!TLI.isTypeLegal(VT)) {
     // Promote MVT::i1 to a legal type though, because it's common and easy.
     if (VT == MVT::i1)
-      VT = TLI.getTypeToTransformTo(VT).getSimpleVT();
+      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
     else
       return 0;
   }
@@ -92,13 +92,14 @@ unsigned FastISel::getRegForValue(Value *V) {
   } else if (isa<ConstantPointerNull>(V)) {
     // Translate this as an integer zero so that it can be
     // local-CSE'd with actual integer zeros.
-    Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType()));
+    Reg =
+      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
   } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
     Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
 
     if (!Reg) {
       const APFloat &Flt = CF->getValueAPF();
-      MVT IntVT = TLI.getPointerTy();
+      EVT IntVT = TLI.getPointerTy();
 
       uint64_t x[2];
       uint32_t IntBitWidth = IntVT.getSizeInBits();
@@ -108,7 +109,8 @@ unsigned FastISel::getRegForValue(Value *V) {
       if (isExact) {
         APInt IntVal(IntBitWidth, 2, x);
 
-        unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal));
+        unsigned IntegerReg =
+          getRegForValue(ConstantInt::get(V->getContext(), IntVal));
         if (IntegerReg != 0)
           Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
       }
@@ -174,13 +176,11 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
   MVT PtrVT = TLI.getPointerTy();
-  MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false);
+  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT))
-    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
-                      ISD::SIGN_EXTEND, IdxN);
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
   else if (IdxVT.bitsGT(PtrVT))
-    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
-                      ISD::TRUNCATE, IdxN);
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
   return IdxN;
 }
 
@@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
 bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
-  MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);
+  EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
@@ -203,7 +203,7 @@ bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
     if (VT == MVT::i1 &&
         (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
          ISDOpcode == ISD::XOR))
-      VT = TLI.getTypeToTransformTo(VT);
+      VT = TLI.getTypeToTransformTo(I->getContext(), VT);
     else
       return false;
   }
@@ -260,7 +260,7 @@ bool FastISel::SelectGetElementPtr(User *I) {
     return false;
 
   const Type *Ty = I->getOperand(0)->getType();
-  MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT();
+  MVT VT = TLI.getPointerTy();
   for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
        OI != E; ++OI) {
     Value *Idx = *OI;
@@ -335,7 +335,7 @@ bool FastISel::SelectCall(User *I) {
     if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW
         && DW->ShouldEmitDwarfDebug()) {
       unsigned ID = 
-        DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext()));
+        DW->RecordRegionStart(RSI->getContext());
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
       BuildMI(MBB, DL, II).addImm(ID);
     }
@@ -346,7 +346,7 @@ bool FastISel::SelectCall(User *I) {
     if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW
         && DW->ShouldEmitDwarfDebug()) {
      unsigned ID = 0;
-     DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext()));
+     DISubprogram Subprogram(REI->getContext());
      if (isInlinedFnEnd(*REI, MF.getFunction())) {
         // This is end of an inlined function.
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
@@ -359,7 +359,7 @@ bool FastISel::SelectCall(User *I) {
           BuildMI(MBB, DL, II).addImm(ID);
       } else {
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-        ID =  DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext()));
+        ID =  DW->RecordRegionEnd(REI->getContext());
         BuildMI(MBB, DL, II).addImm(ID);
       }
     }
@@ -384,11 +384,10 @@ bool FastISel::SelectCall(User *I) {
       setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
       
       DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-      DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
-      unsigned LabelID = DW->RecordInlinedFnStart(SP,
-                                                  DICompileUnit(PrevLocTpl.CompileUnit),
-                                                  PrevLocTpl.Line,
-                                                  PrevLocTpl.Col);
+      DISubprogram SP(FSI->getSubprogram());
+      unsigned LabelID = 
+        DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope),
+                                 PrevLocTpl.Line, PrevLocTpl.Col);
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
       BuildMI(MBB, DL, II).addImm(LabelID);
       return true;
@@ -398,7 +397,7 @@ bool FastISel::SelectCall(User *I) {
     MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
     
     // llvm.dbg.func_start also defines beginning of function scope.
-    DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));
+    DW->RecordRegionStart(FSI->getSubprogram());
     return true;
   }
   case Intrinsic::dbg_declare: {
@@ -407,7 +406,6 @@ bool FastISel::SelectCall(User *I) {
         || !DW->ShouldEmitDwarfDebug())
       return true;
 
-    Value *Variable = DI->getVariable();
     Value *Address = DI->getAddress();
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
       Address = BCI->getOperand(0);
@@ -418,20 +416,15 @@ bool FastISel::SelectCall(User *I) {
       StaticAllocaMap.find(AI);
     if (SI == StaticAllocaMap.end()) break; // VLAs.
     int FI = SI->second;
-    
-    // Determine the debug globalvariable.
-    GlobalValue *GV = cast<GlobalVariable>(Variable);
-    
-    // Build the DECLARE instruction.
-    const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);
-    MachineInstr *DeclareMI 
-      = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);
-    DIVariable DV(cast<GlobalVariable>(GV));
-    DW->RecordVariableScope(DV, DeclareMI);
+    if (MMI)
+      MMI->setVariableDbgInfo(DI->getVariable(), FI);
+#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN
+    DW->RecordVariable(DI->getVariable(), FI);
+#endif
     return true;
   }
   case Intrinsic::eh_exception: {
-    MVT VT = TLI.getValueType(I->getType());
+    EVT VT = TLI.getValueType(I->getType());
     switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
     default: break;
     case TargetLowering::Expand: {
@@ -449,15 +442,11 @@ bool FastISel::SelectCall(User *I) {
     }
     break;
   }
-  case Intrinsic::eh_selector_i32:
-  case Intrinsic::eh_selector_i64: {
-    MVT VT = TLI.getValueType(I->getType());
+  case Intrinsic::eh_selector: {
+    EVT VT = TLI.getValueType(I->getType());
     switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
     default: break;
     case TargetLowering::Expand: {
-      MVT VT = (IID == Intrinsic::eh_selector_i32 ?
-                           MVT::i32 : MVT::i64);
-
       if (MMI) {
         if (MBB->isLandingPad())
           AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
@@ -471,12 +460,25 @@ bool FastISel::SelectCall(User *I) {
         }
 
         unsigned Reg = TLI.getExceptionSelectorRegister();
-        const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+        EVT SrcVT = TLI.getPointerTy();
+        const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
         unsigned ResultReg = createResultReg(RC);
-        bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                             Reg, RC, RC);
+        bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
+                                             RC, RC);
         assert(InsertedCopy && "Can't copy address registers!");
         InsertedCopy = InsertedCopy;
+
+        // Cast the register to the type of the selector.
+        if (SrcVT.bitsGT(MVT::i32))
+          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                                 ResultReg);
+        else if (SrcVT.bitsLT(MVT::i32))
+          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                                 ISD::SIGN_EXTEND, ResultReg);
+        if (ResultReg == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+
         UpdateValueMap(I, ResultReg);
       } else {
         unsigned ResultReg =
@@ -493,8 +495,8 @@ bool FastISel::SelectCall(User *I) {
 }
 
 bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
-  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
     
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple())
@@ -524,14 +526,14 @@ bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
 
   // If the operand is i1, arrange for the high bits in the register to be zero.
   if (SrcVT == MVT::i1) {
-   SrcVT = TLI.getTypeToTransformTo(SrcVT);
+   SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
    InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
    if (!InputReg)
      return false;
   }
   // If the result is i1, truncate to the target's type for i1 first.
   if (DstVT == MVT::i1)
-    DstVT = TLI.getTypeToTransformTo(DstVT);
+    DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
 
   unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
                                   DstVT.getSimpleVT(),
@@ -555,8 +557,8 @@ bool FastISel::SelectBitCast(User *I) {
   }
 
   // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
-  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
   
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple() ||
@@ -616,6 +618,49 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
   MBB->addSuccessor(MSucc);
 }
 
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(User *I) {
+  unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+  if (OpReg == 0) return false;
+
+  // If the target has ISD::FNEG, use it.
+  EVT VT = TLI.getValueType(I->getType());
+  unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+                                  ISD::FNEG, OpReg);
+  if (ResultReg != 0) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  // Bitcast the value to integer, twiddle the sign bit with xor,
+  // and then bitcast it back to floating-point.
+  if (VT.getSizeInBits() > 64) return false;
+  EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+  if (!TLI.isTypeLegal(IntVT))
+    return false;
+
+  unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+                               ISD::BIT_CONVERT, OpReg);
+  if (IntReg == 0)
+    return false;
+
+  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+                                       UINT64_C(1) << (VT.getSizeInBits()-1),
+                                       IntVT.getSimpleVT());
+  if (IntResultReg == 0)
+    return false;
+
+  ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+                         ISD::BIT_CONVERT, IntResultReg);
+  if (ResultReg == 0)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
 bool
 FastISel::SelectOperator(User *I, unsigned Opcode) {
   switch (Opcode) {
@@ -626,6 +671,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
   case Instruction::Sub:
     return SelectBinaryOp(I, ISD::SUB);
   case Instruction::FSub:
+    // FNeg is currently represented in LLVM IR as a special case of FSub.
+    if (BinaryOperator::isFNeg(I))
+      return SelectFNeg(I);
     return SelectBinaryOp(I, ISD::FSUB);
   case Instruction::Mul:
     return SelectBinaryOp(I, ISD::MUL);
@@ -709,8 +757,8 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
 
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
-    MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-    MVT DstVT = TLI.getValueType(I->getType());
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
     if (DstVT.bitsGT(SrcVT))
       return SelectCast(I, ISD::ZERO_EXTEND);
     if (DstVT.bitsLT(SrcVT))
@@ -758,45 +806,44 @@ FastISel::FastISel(MachineFunction &mf,
 
 FastISel::~FastISel() {}
 
-unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_(MVT, MVT,
                              ISD::NodeType) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_r(MVT, MVT,
                               ISD::NodeType, unsigned /*Op0*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, 
+unsigned FastISel::FastEmit_rr(MVT, MVT, 
                                ISD::NodeType, unsigned /*Op0*/,
                                unsigned /*Op0*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType,
-                              ISD::NodeType, uint64_t /*Imm*/) {
+unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_f(MVT, MVT,
                               ISD::NodeType, ConstantFP * /*FPImm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_ri(MVT, MVT,
                                ISD::NodeType, unsigned /*Op0*/,
                                uint64_t /*Imm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_rf(MVT, MVT,
                                ISD::NodeType, unsigned /*Op0*/,
                                ConstantFP * /*FPImm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_rri(MVT, MVT,
                                 ISD::NodeType,
                                 unsigned /*Op0*/, unsigned /*Op1*/,
                                 uint64_t /*Imm*/) {
@@ -807,9 +854,9 @@ unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,
 /// to emit an instruction with an immediate operand using FastEmit_ri.
 /// If that fails, it materializes the immediate into a register and try
 /// FastEmit_rr instead.
-unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode,
                                 unsigned Op0, uint64_t Imm,
-                                MVT::SimpleValueType ImmType) {
+                                MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the ri form.
   unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
   if (ResultReg != 0)
@@ -824,9 +871,9 @@ unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
 /// to emit an instruction with a floating-point immediate operand using
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode,
                                 unsigned Op0, ConstantFP *FPImm,
-                                MVT::SimpleValueType ImmType) {
+                                MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
   unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
   if (ResultReg != 0)
@@ -842,7 +889,7 @@ unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
     // be replaced by code that creates a load from a constant-pool entry,
     // which will require some target-specific work.
     const APFloat &Flt = FPImm->getValueAPF();
-    MVT IntVT = TLI.getPointerTy();
+    EVT IntVT = TLI.getPointerTy();
 
     uint64_t x[2];
     uint32_t IntBitWidth = IntVT.getSizeInBits();
@@ -987,7 +1034,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
                                               unsigned Op0, uint32_t Idx) {
   const TargetRegisterClass* RC = MRI.getRegClass(Op0);
   
@@ -1008,6 +1055,6 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
 
 /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
 /// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) {
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
   return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
 }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 0000000..d3ffb2a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,693 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Flag)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional flag operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VRBase = 0;
+  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+    // Just use the input register directly!
+    SDValue Op(Node, ResNo);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+    return;
+  }
+
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  bool MatchReg = true;
+  const TargetRegisterClass *UseRC = NULL;
+  if (!IsClone && !IsCloned)
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      bool Match = true;
+      if (User->getOpcode() == ISD::CopyToReg && 
+          User->getOperand(2).getNode() == Node &&
+          User->getOperand(2).getResNo() == ResNo) {
+        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+          VRBase = DestReg;
+          Match = false;
+        } else if (DestReg != SrcReg)
+          Match = false;
+      } else {
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          SDValue Op = User->getOperand(i);
+          if (Op.getNode() != Node || Op.getResNo() != ResNo)
+            continue;
+          EVT VT = Node->getValueType(Op.getResNo());
+          if (VT == MVT::Other || VT == MVT::Flag)
+            continue;
+          Match = false;
+          if (User->isMachineOpcode()) {
+            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+            const TargetRegisterClass *RC = 0;
+            if (i+II.getNumDefs() < II.getNumOperands())
+              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+            if (!UseRC)
+              UseRC = RC;
+            else if (RC) {
+              const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+              // If multiple uses expect disjoint register classes, we emit
+              // copies in AddRegisterOperand.
+              if (ComRC)
+                UseRC = ComRC;
+            }
+          }
+        }
+      }
+      MatchReg &= Match;
+      if (VRBase)
+        break;
+    }
+
+  EVT VT = Node->getValueType(ResNo);
+  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+  SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+  
+  // Figure out the register class to create for the destreg.
+  if (VRBase) {
+    DstRC = MRI->getRegClass(VRBase);
+  } else if (UseRC) {
+    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+    DstRC = UseRC;
+  } else {
+    DstRC = TLI->getRegClassFor(VT);
+  }
+    
+  // If all uses are reading from the src physical register and copying the
+  // register is either impossible or very expensive, then don't create a copy.
+  if (MatchReg && SrcRC->getCopyCost() < 0) {
+    VRBase = SrcReg;
+  } else {
+    // Create the reg, emit the copy.
+    VRBase = MRI->createVirtualRegister(DstRC);
+    bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
+                                     DstRC, SrcRC);
+
+    assert(Emitted && "Unable to issue a copy instruction!\n");
+    (void) Emitted;
+  }
+
+  SDValue Op(Node, ResNo);
+  if (IsClone)
+    VRBaseMap.erase(Op);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                                unsigned ResNo) const {
+  if (!Node->hasOneUse())
+    return 0;
+
+  SDNode *User = *Node->use_begin();
+  if (User->getOpcode() == ISD::CopyToReg && 
+      User->getOperand(2).getNode() == Node &&
+      User->getOperand(2).getResNo() == ResNo) {
+    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return Reg;
+  }
+  return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                                       const TargetInstrDesc &II,
+                                       bool IsClone, bool IsCloned,
+                                       DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&
+         "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+  for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg in the same register class, use the CopyToReg'd destination
+    // register instead of creating a new vreg.
+    unsigned VRBase = 0;
+    const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+    if (II.OpInfo[i].isOptionalDef()) {
+      // Optional def must be a physical register.
+      unsigned NumResults = CountResults(Node);
+      VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    if (!VRBase && !IsClone && !IsCloned)
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *User = *UI;
+        if (User->getOpcode() == ISD::CopyToReg && 
+            User->getOperand(2).getNode() == Node &&
+            User->getOperand(2).getResNo() == i) {
+          unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+            const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+            if (RegRC == RC) {
+              VRBase = Reg;
+              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              break;
+            }
+          }
+        }
+      }
+
+    // Create the result registers for this node and add the result regs to
+    // the machine instruction.
+    if (VRBase == 0) {
+      assert(RC && "Isn't a register operand!");
+      VRBase = MRI->createVirtualRegister(RC);
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    SDValue Op(Node, i);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+                             DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode() &&
+      Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+    // Add an IMPLICIT_DEF instruction before every use.
+    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+    // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+    // does not include operand register class info.
+    if (!VReg) {
+      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      VReg = MRI->createVirtualRegister(RC);
+    }
+    BuildMI(MBB, Op.getDebugLoc(),
+            TII->get(TargetInstrInfo::IMPLICIT_DEF), VReg);
+    return VReg;
+  }
+
+  DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                                 unsigned IIOpNum,
+                                 const TargetInstrDesc *II,
+                                 DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Op.getValueType() != MVT::Other &&
+         Op.getValueType() != MVT::Flag &&
+         "Chain and flag operands should occur at end of operand list!");
+  // Get/emit the operand.
+  unsigned VReg = getVR(Op, VRBaseMap);
+  assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  bool isOptDef = IIOpNum < TID.getNumOperands() &&
+    TID.OpInfo[IIOpNum].isOptionalDef();
+
+  // If the instruction requires a register in a different class, create
+  // a new virtual register and copy the value into it.
+  if (II) {
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *DstRC = 0;
+    if (IIOpNum < II->getNumOperands())
+      DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
+    assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+           "Don't have operand info for this instruction!");
+    if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+      bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+                                       DstRC, SrcRC);
+      assert(Emitted && "Unable to issue a copy instruction!\n");
+      (void) Emitted;
+      VReg = NewVReg;
+    }
+  }
+
+  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+                              unsigned IIOpNum,
+                              const TargetInstrDesc *II,
+                              DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode()) {
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+  } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+  } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+    const ConstantFP *CFP = F->getConstantFPValue();
+    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+  } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
+  } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+  } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+  } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM->getTargetData()->getTypeAllocSize(Type);
+      }
+    }
+    
+    unsigned Idx;
+    MachineConstantPool *MCP = MF->getConstantPool();
+    if (CP->isMachineConstantPoolEntry())
+      Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
+  } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+                                            ES->getTargetFlags()));
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+  }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+                         unsigned SubIdx, EVT VT) {
+  // Pick the register class of the superegister for this type
+  for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+         E = TRC->superregclasses_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node, 
+                                  DenseMap<SDValue, unsigned> &VRBaseMap){
+  unsigned VRBase = 0;
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+       UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() == ISD::CopyToReg && 
+        User->getOperand(2).getNode() == Node) {
+      unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+        VRBase = DestReg;
+        break;
+      }
+    }
+  }
+  
+  if (Opc == TargetInstrInfo::EXTRACT_SUBREG) {
+    unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+    // Create the extract_subreg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetInstrInfo::EXTRACT_SUBREG));
+
+    // Figure out the register class to create for the destreg.
+    unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+    const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+    assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Add def, source, and subreg index
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else if (Opc == TargetInstrInfo::INSERT_SUBREG ||
+             Opc == TargetInstrInfo::SUBREG_TO_REG) {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+    unsigned SubReg = getVR(N1, VRBaseMap);
+    unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx,
+                               Node->getValueType(0));
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Create the insert_subreg or subreg_to_reg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    
+    // If creating a subreg_to_reg, then the first input operand
+    // is an implicit value immediate, otherwise it's a register
+    if (Opc == TargetInstrInfo::SUBREG_TO_REG) {
+      const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+    } else
+      AddOperand(MI, N0, 0, 0, VRBaseMap);
+    // Add the subregster being inserted
+    AddOperand(MI, N1, 0, 0, VRBaseMap);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else
+    llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+     
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+                                     DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+  const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+  const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+
+  // Create the new VReg in the destination class and emit a copy.
+  unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+  bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+                                   DstRC, SrcRC);
+  assert(Emitted &&
+         "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
+  (void) Emitted;
+
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                         DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  // If machine instruction
+  if (Node->isMachineOpcode()) {
+    unsigned Opc = Node->getMachineOpcode();
+    
+    // Handle subreg insert/extract specially
+    if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 
+        Opc == TargetInstrInfo::INSERT_SUBREG ||
+        Opc == TargetInstrInfo::SUBREG_TO_REG) {
+      EmitSubregNode(Node, VRBaseMap);
+      return;
+    }
+
+    // Handle COPY_TO_REGCLASS specially.
+    if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) {
+      EmitCopyToRegClassNode(Node, VRBaseMap);
+      return;
+    }
+
+    if (Opc == TargetInstrInfo::IMPLICIT_DEF)
+      // We want a unique VR for each IMPLICIT_DEF use.
+      return;
+    
+    const TargetInstrDesc &II = TII->get(Opc);
+    unsigned NumResults = CountResults(Node);
+    unsigned NodeOperands = CountOperands(Node);
+    bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
+                          II.getImplicitDefs() != 0;
+#ifndef NDEBUG
+    unsigned NumMIOperands = NodeOperands + NumResults;
+    assert((II.getNumOperands() == NumMIOperands ||
+            HasPhysRegOuts || II.isVariadic()) &&
+           "#operands for dag node doesn't match .td file!"); 
+#endif
+
+    // Create the new machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+    
+    // Add result register values for things that are defined by this
+    // instruction.
+    if (NumResults)
+      CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+    
+    // Emit all of the actual operands of this instruction, adding them to the
+    // instruction as appropriate.
+    bool HasOptPRefs = II.getNumDefs() > NumResults;
+    assert((!HasOptPRefs || !HasPhysRegOuts) &&
+           "Unable to cope with optional defs and phys regs defs!");
+    unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+    for (unsigned i = NumSkip; i != NodeOperands; ++i)
+      AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+                 VRBaseMap);
+
+    // Transfer all of the memory reference descriptions of this instruction.
+    MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+                   cast<MachineSDNode>(Node)->memoperands_end());
+
+    if (II.usesCustomDAGSchedInsertionHook()) {
+      // Insert this instruction into the basic block using a target
+      // specific inserter which may returns a new basic block.
+      MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+      InsertPos = MBB->end();
+    } else {
+      MBB->insert(InsertPos, MI);
+    }
+
+    // Additional results must be an physical register def.
+    if (HasPhysRegOuts) {
+      for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+        unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+        if (Node->hasAnyUseOfValue(i))
+          EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+      }
+    }
+    return;
+  }
+
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    Node->dump();
+#endif
+    llvm_unreachable("This target-independent node should have been selected!");
+    break;
+  case ISD::EntryToken:
+    llvm_unreachable("EntryToken should have been excluded from the schedule!");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor: // fall thru
+    break;
+  case ISD::CopyToReg: {
+    unsigned SrcReg;
+    SDValue SrcVal = Node->getOperand(2);
+    if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+      SrcReg = R->getReg();
+    else
+      SrcReg = getVR(SrcVal, VRBaseMap);
+      
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+      break;
+      
+    const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
+    // Get the register classes of the src/dst.
+    if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+      SrcTRC = MRI->getRegClass(SrcReg);
+    else
+      SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
+
+    if (TargetRegisterInfo::isVirtualRegister(DestReg))
+      DstTRC = MRI->getRegClass(DestReg);
+    else
+      DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
+                                            Node->getOperand(1).getValueType());
+
+    bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
+                                     DstTRC, SrcTRC);
+    assert(Emitted && "Unable to issue a copy instruction!\n");
+    (void) Emitted;
+    break;
+  }
+  case ISD::CopyFromReg: {
+    unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+    break;
+  }
+  case ISD::INLINEASM: {
+    unsigned NumOps = Node->getNumOperands();
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+      --NumOps;  // Ignore the flag operand.
+      
+    // Create the inline asm machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetInstrInfo::INLINEASM));
+
+    // Add the asm string as an external symbol operand.
+    const char *AsmStr =
+      cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+    MI->addOperand(MachineOperand::CreateES(AsmStr));
+      
+    // Add all of the operand registers to the instruction.
+    for (unsigned i = 2; i != NumOps;) {
+      unsigned Flags =
+        cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        
+      MI->addOperand(MachineOperand::CreateImm(Flags));
+      ++i;  // Skip the ID value.
+        
+      switch (Flags & 7) {
+      default: llvm_unreachable("Bad flags!");
+      case 2:   // Def of register.
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, true));
+        }
+        break;
+      case 6:   // Def of earlyclobber register.
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
+                                                   false, false, true));
+        }
+        break;
+      case 1:  // Use of register.
+      case 3:  // Immediate.
+      case 4:  // Addressing mode.
+        // The addressing mode has been selected, just add all of the
+        // operands to the machine instruction.
+        for (; NumVals; --NumVals, ++i)
+          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+        break;
+      }
+    }
+    MBB->insert(InsertPos, MI);
+    break;
+  }
+  }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+                           MachineBasicBlock::iterator insertpos)
+  : MF(mbb->getParent()),
+    MRI(&MF->getRegInfo()),
+    TM(&MF->getTarget()),
+    TII(TM->getInstrInfo()),
+    TRI(TM->getRegisterInfo()),
+    TLI(TM->getTargetLowering()),
+    MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 0000000..bb4634d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,119 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class TargetInstrDesc;
+
+class InstrEmitter {
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+  const TargetMachine *TM;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+
+  MachineBasicBlock *MBB;
+  MachineBasicBlock::iterator InsertPos;
+
+  /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+  /// implicit physical register output.
+  void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+                       bool IsClone, bool IsCloned,
+                       unsigned SrcReg,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getDstOfCopyToRegUse - If the only use of the specified result number of
+  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                    unsigned ResNo) const;
+
+  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                              const TargetInstrDesc &II,
+                              bool IsClone, bool IsCloned,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getVR - Return the virtual register corresponding to the specified result
+  /// of the specified node.
+  unsigned getVR(SDValue Op,
+                 DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddRegisterOperand - Add the specified register as an operand to the
+  /// specified machine instr. Insert register copies if the register is
+  /// not in the required register class.
+  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                          unsigned IIOpNum,
+                          const TargetInstrDesc *II,
+                          DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddOperand - Add the specified operand to the specified machine instr.  II
+  /// specifies the instruction information for the node, and IIOpNum is the
+  /// operand number (in the II) that we are adding. IIOpNum and II are used for
+  /// assertions only.
+  void AddOperand(MachineInstr *MI, SDValue Op,
+                  unsigned IIOpNum,
+                  const TargetInstrDesc *II,
+                  DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitSubregNode - Generate machine code for subreg nodes.
+  ///
+  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+  /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+  /// register is constrained to be in a particular register class.
+  ///
+  void EmitCopyToRegClassNode(SDNode *Node,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+public:
+  /// CountResults - The results of target nodes have register or immediate
+  /// operands first, then an optional chain, and optional flag operands
+  /// (which do not go into the machine instrs.)
+  static unsigned CountResults(SDNode *Node);
+
+  /// CountOperands - The inputs to target nodes have any actual inputs first,
+  /// followed by an optional chain operand, then flag operands.  Compute
+  /// the number of actual operands that will go into the resulting
+  /// MachineInstr.
+  static unsigned CountOperands(SDNode *Node);
+
+  /// EmitNode - Generate machine code for an node and needed dependencies.
+  ///
+  void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap,
+                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+
+  /// getBlock - Return the current basic block.
+  MachineBasicBlock *getBlock() { return MBB; }
+
+  /// getInsertPos - Return the current insertion position.
+  MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+  /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+  /// at the given position in the given block.
+  InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1413d95..fc01b07 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -30,9 +30,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -98,13 +101,14 @@ public:
   /// getTypeAction - Return how we should legalize values of this type, either
   /// it is already legal or we need to expand it into multiple registers of
   /// smaller integer type, or we need to promote it to a larger type.
-  LegalizeAction getTypeAction(MVT VT) const {
-    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+  LegalizeAction getTypeAction(EVT VT) const {
+    return
+        (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
   ///
-  bool isTypeLegal(MVT VT) const {
+  bool isTypeLegal(EVT VT) const {
     return getTypeAction(VT) == Legal;
   }
 
@@ -131,14 +135,14 @@ private:
   /// performs the same shuffe in terms of order or result bytes, but on a type
   /// whose vector element type is narrower than the original shuffle type.
   /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
-  SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,
+  SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
                                      SDValue N1, SDValue N2, 
                                      SmallVectorImpl<int> &Mask) const;
 
   bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
 
-  void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
                              DebugLoc dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
@@ -149,18 +153,18 @@ private:
                            RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
 
-  SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl);
+  SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
   SDValue ExpandDBG_STOPPOINT(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
                                 SmallVectorImpl<SDValue> &Results);
   SDValue ExpandFCOPYSIGN(SDNode *Node);
-  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT,
+  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
                                DebugLoc dl);
-  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned,
+  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 DebugLoc dl);
-  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned,
+  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 DebugLoc dl);
 
   SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
@@ -179,10 +183,10 @@ private:
 /// whose vector element type is narrower than the original shuffle type.
 /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
 SDValue 
-SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT,  DebugLoc dl, 
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl, 
                                                  SDValue N1, SDValue N2,
                                              SmallVectorImpl<int> &Mask) const {
-  MVT EltVT = NVT.getVectorElementType();
+  EVT EltVT = NVT.getVectorElementType();
   unsigned NumMaskElts = VT.getVectorNumElements();
   unsigned NumDestElts = NVT.getVectorNumElements();
   unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -342,7 +346,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
   // double.  This shrinks FP constants and canonicalizes them for targets where
   // an FP extending load is the same cost as a normal load (such as on the x87
   // fp stack or PPC FP unit).
-  MVT VT = CFP->getValueType(0);
+  EVT VT = CFP->getValueType(0);
   ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
   if (!UseCP) {
     assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
@@ -350,16 +354,16 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
                            (VT == MVT::f64) ? MVT::i64 : MVT::i32);
   }
 
-  MVT OrigVT = VT;
-  MVT SVT = VT;
+  EVT OrigVT = VT;
+  EVT SVT = VT;
   while (SVT != MVT::f32) {
-    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1);
+    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
     if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
         // Only do this if the target has a native EXTLOAD instruction from
         // smaller type.
         TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
         TLI.ShouldShrinkFPConstant(OrigVT)) {
-      const Type *SType = SVT.getTypeForMVT();
+      const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
       LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
       VT = SVT;
       Extend = true;
@@ -384,13 +388,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
   SDValue Val = ST->getValue();
-  MVT VT = Val.getValueType();
+  EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
   int SVOffset = ST->getSrcValueOffset();
   DebugLoc dl = ST->getDebugLoc();
   if (ST->getMemoryVT().isFloatingPoint() ||
       ST->getMemoryVT().isVector()) {
-    MVT intVT = MVT::getIntegerVT(VT.getSizeInBits());
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
     if (TLI.isTypeLegal(intVT)) {
       // Expand to a bitconvert of the value to the integer type of the
       // same size, then a (misaligned) int store.
@@ -401,9 +405,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
-      MVT StoredVT = ST->getMemoryVT();
-      MVT RegVT =
-        TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits()));
+      EVT StoredVT = ST->getMemoryVT();
+      EVT RegVT =
+        TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
       unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
@@ -437,7 +441,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // The last store may be partial.  Do a truncating store.  On big-endian
       // machines this requires an extending load from the stack slot to ensure
       // that the bits are in the right place.
-      MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
@@ -456,8 +460,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
          !ST->getMemoryVT().isVector() &&
          "Unaligned store of unknown type.");
   // Get the half-size VT
-  MVT NewStoredVT =
-    (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1);
+  EVT NewStoredVT =
+    (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT().SimpleTy - 1);
   int NumBits = NewStoredVT.getSizeInBits();
   int IncrementSize = NumBits / 8;
 
@@ -488,11 +492,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   int SVOffset = LD->getSrcValueOffset();
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
-  MVT VT = LD->getValueType(0);
-  MVT LoadedVT = LD->getMemoryVT();
+  EVT VT = LD->getValueType(0);
+  EVT LoadedVT = LD->getMemoryVT();
   DebugLoc dl = LD->getDebugLoc();
   if (VT.isFloatingPoint() || VT.isVector()) {
-    MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits());
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
     if (TLI.isTypeLegal(intVT)) {
       // Expand to a (misaligned) integer load of the same size,
       // then bitconvert to floating point or vector.
@@ -508,7 +512,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     } else {
       // Copy the value to a (aligned) stack slot using (unaligned) integer
       // loads and stores, then do a (aligned) load from the stack slot.
-      MVT RegVT = TLI.getRegisterType(intVT);
+      EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
       unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
@@ -538,7 +542,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       }
 
       // The last copy may be partial.  Do an extending load.
-      MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
@@ -568,8 +572,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   // Compute the new VT that is half the size of the old one.  This is an
   // integer MVT.
   unsigned NumBits = LoadedVT.getSizeInBits();
-  MVT NewLoadedVT;
-  NewLoadedVT = MVT::getIntegerVT(NumBits/2);
+  EVT NewLoadedVT;
+  NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
   NumBits >>= 1;
 
   unsigned Alignment = LD->getAlignment();
@@ -629,10 +633,10 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   // with a "move to register" or "extload into register" instruction, then
   // permute it into place, if the idx is a constant and if the idx is
   // supported by the target.
-  MVT VT    = Tmp1.getValueType();
-  MVT EltVT = VT.getVectorElementType();
-  MVT IdxVT = Tmp3.getValueType();
-  MVT PtrVT = TLI.getPointerTy();
+  EVT VT    = Tmp1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Tmp3.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
   SDValue StackPtr = DAG.CreateStackTemporary(VT);
 
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -663,7 +667,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
     // SCALAR_TO_VECTOR requires that the type of the value being inserted
     // match the element type of the vector being created, except for
     // integers in which case the inserted value can be over width.
-    MVT EltVT = Vec.getValueType().getVectorElementType();
+    EVT EltVT = Vec.getValueType().getVectorElementType();
     if (Val.getValueType() == EltVT ||
         (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
       SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -785,7 +789,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   case ISD::FP_ROUND_INREG:
   case ISD::SIGN_EXTEND_INREG: {
-    MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
     Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
     break;
   }
@@ -795,7 +799,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
                          Node->getOpcode() == ISD::SETCC ? 2 : 1;
     unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
-    MVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
     ISD::CondCode CCCode =
         cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
     Action = TLI.getCondCodeAction(CCCode, OpVT);
@@ -821,11 +825,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // special case should be done as part of making LegalizeDAG non-recursive.
     SimpleFinishLegalizing = false;
     break;
-  case ISD::CALL:
-    // FIXME: Legalization for calls requires custom-lowering the call before
-    // legalizing the operands!  (I haven't looked into precisely why.)
-    SimpleFinishLegalizing = false;
-    break;
   case ISD::EXTRACT_ELEMENT:
   case ISD::FLT_ROUNDS_:
   case ISD::SADDO:
@@ -847,7 +846,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::TRAMPOLINE:
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
-  case ISD::FORMAL_ARGUMENTS:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be custom-lowered.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -885,7 +883,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::BR_JT:
     case ISD::BR_CC:
     case ISD::BRCOND:
-    case ISD::RET:
       // Branches tweak the chain to include LastCALLSEQ_END
       Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
                             LastCALLSEQ_END);
@@ -902,6 +899,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       if (!Ops[1].getValueType().isVector())
         Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
       break;
+    case ISD::SRL_PARTS:
+    case ISD::SRA_PARTS:
+    case ISD::SHL_PARTS:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[2].getValueType().isVector())
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+      break;
     }
 
     Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
@@ -946,44 +951,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   switch (Node->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+    errs() << "NODE: ";
+    Node->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to legalize this operator!");
-    abort();
-  case ISD::CALL:
-    // The only option for this is to custom lower it.
-    Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
-    assert(Tmp3.getNode() && "Target didn't custom lower this node!");
-    // A call within a calling sequence must be legalized to something
-    // other than the normal CALLSEQ_END.  Violating this gets Legalize
-    // into an infinite loop.
-    assert ((!IsLegalizingCall ||
-             Node->getOpcode() != ISD::CALL ||
-             Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) &&
-            "Nested CALLSEQ_START..CALLSEQ_END not supported.");
-
-    // The number of incoming and outgoing values should match; unless the final
-    // outgoing value is a flag.
-    assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() ||
-            (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 &&
-             Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) ==
-               MVT::Flag)) &&
-           "Lowering call/formal_arguments produced unexpected # results!");
-
-    // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
-    // remember that we legalized all of them, so it doesn't get relegalized.
-    for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) {
-      if (Tmp3.getNode()->getValueType(i) == MVT::Flag)
-        continue;
-      Tmp1 = LegalizeOp(Tmp3.getValue(i));
-      if (Op.getResNo() == i)
-        Tmp2 = Tmp1;
-      AddLegalizedOperand(SDValue(Node, i), Tmp1);
-    }
-    return Tmp2;
+    llvm_unreachable("Do not know how to legalize this operator!");
+
   case ISD::BUILD_VECTOR:
     switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: assert(0 && "This action is not supported yet!");
+    default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
       Tmp3 = TLI.LowerOperation(Result, DAG);
       if (Tmp3.getNode()) {
@@ -1094,22 +1070,22 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     ISD::LoadExtType ExtType = LD->getExtensionType();
     if (ExtType == ISD::NON_EXTLOAD) {
-      MVT VT = Node->getValueType(0);
+      EVT VT = Node->getValueType(0);
       Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
       Tmp3 = Result.getValue(0);
       Tmp4 = Result.getValue(1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
-        if (!TLI.allowsUnalignedMemoryAccesses()) {
-          unsigned ABIAlignment = TLI.getTargetData()->
-            getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+        if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+          const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
           if (LD->getAlignment() < ABIAlignment){
-            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
-                                         TLI);
+            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), 
+                                         DAG, TLI);
             Tmp3 = Result.getOperand(0);
             Tmp4 = Result.getOperand(1);
             Tmp3 = LegalizeOp(Tmp3);
@@ -1128,7 +1104,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Only promote a load of vector type to another.
         assert(VT.isVector() && "Cannot promote this load!");
         // Change base type to a different vector type.
-        MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+        EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                            LD->getSrcValueOffset(),
@@ -1144,7 +1120,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       AddLegalizedOperand(SDValue(Node, 1), Tmp4);
       return Op.getResNo() ? Tmp4 : Tmp3;
     } else {
-      MVT SrcVT = LD->getMemoryVT();
+      EVT SrcVT = LD->getMemoryVT();
       unsigned SrcWidth = SrcVT.getSizeInBits();
       int SVOffset = LD->getSrcValueOffset();
       unsigned Alignment = LD->getAlignment();
@@ -1163,7 +1139,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Promote to a byte-sized load if not loading an integral number of
         // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
         unsigned NewWidth = SrcVT.getStoreSizeInBits();
-        MVT NVT = MVT::getIntegerVT(NewWidth);
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
         SDValue Ch;
 
         // The extra bits are guaranteed to be zero, since we stored them that
@@ -1201,8 +1177,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         assert(ExtraWidth < RoundWidth);
         assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
                "Load size not an integral number of bytes!");
-        MVT RoundVT = MVT::getIntegerVT(RoundWidth);
-        MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
         SDValue Lo, Hi, Ch;
         unsigned IncrementSize;
 
@@ -1269,7 +1245,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Tmp2 = LegalizeOp(Ch);
       } else {
         switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Custom:
           isCustom = true;
           // FALLTHROUGH
@@ -1287,12 +1263,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           } else {
             // If this is an unaligned load and the target doesn't support it,
             // expand it.
-            if (!TLI.allowsUnalignedMemoryAccesses()) {
-              unsigned ABIAlignment = TLI.getTargetData()->
-                getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+              const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+              unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
               if (LD->getAlignment() < ABIAlignment){
-                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
-                                             TLI);
+                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), 
+                                             DAG, TLI);
                 Tmp1 = Result.getOperand(0);
                 Tmp2 = Result.getOperand(1);
                 Tmp1 = LegalizeOp(Tmp1);
@@ -1303,10 +1279,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           break;
         case TargetLowering::Expand:
           // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
-          if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+          // f128 = EXTLOAD {f32,f64} too
+          if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
+                                     Node->getValueType(0) == MVT::f128)) ||
+              (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
             SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
-                                         LD->getSrcValueOffset(),
-                                         LD->isVolatile(), LD->getAlignment());
+                                       LD->getSrcValueOffset(),
+                                       LD->isVolatile(), LD->getAlignment());
             Result = DAG.getNode(ISD::FP_EXTEND, dl,
                                  Node->getValueType(0), Load);
             Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
@@ -1359,18 +1338,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
                                         ST->getOffset());
 
-        MVT VT = Tmp3.getValueType();
+        EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsUnalignedMemoryAccesses()) {
-            unsigned ABIAlignment = TLI.getTargetData()->
-              getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
-                                            TLI);
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
           }
           break;
         case TargetLowering::Custom:
@@ -1391,14 +1370,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     } else {
       Tmp3 = LegalizeOp(ST->getValue());
 
-      MVT StVT = ST->getMemoryVT();
+      EVT StVT = ST->getMemoryVT();
       unsigned StWidth = StVT.getSizeInBits();
 
       if (StWidth != StVT.getStoreSizeInBits()) {
         // Promote to a byte-sized store with upper bits zero if not
         // storing an integral number of bytes.  For example, promote
         // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
-        MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits());
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                    SVOffset, NVT, isVolatile, Alignment);
@@ -1412,8 +1391,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         assert(ExtraWidth < RoundWidth);
         assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
                "Store size not an integral number of bytes!");
-        MVT RoundVT = MVT::getIntegerVT(RoundWidth);
-        MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
         SDValue Lo, Hi;
         unsigned IncrementSize;
 
@@ -1460,16 +1439,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                           ST->getOffset());
 
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsUnalignedMemoryAccesses()) {
-            unsigned ABIAlignment = TLI.getTargetData()->
-              getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
-                                            TLI);
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
           }
           break;
         case TargetLowering::Custom:
@@ -1522,7 +1501,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
 
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
-  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+  if (Op.getValueType().isVector())
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+  else
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                          NULL, 0, Vec.getValueType().getVectorElementType());
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1530,8 +1513,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // aligned object on the stack, store each element into it, then load
   // the result as a vector.
   // Create the stack frame object.
-  MVT VT = Node->getValueType(0);
-  MVT OpVT = Node->getOperand(0).getValueType();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
@@ -1574,7 +1557,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
           "Ugly special-cased code!");
   // Get the sign bit of the RHS.
   SDValue SignBit;
-  MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+  EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
   if (isTypeLegal(IVT)) {
     SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
   } else {
@@ -1613,9 +1596,8 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
   bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);
 
   const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);
-  GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit());
-  if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) {
-    DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit()));
+  MDNode *CU_Node = DSP->getCompileUnit();
+  if (DW && (useDEBUG_LOC || useLABEL)) {
 
     unsigned Line = DSP->getLine();
     unsigned Col = DSP->getColumn();
@@ -1627,9 +1609,9 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
         return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),
                            DAG.getConstant(Line, MVT::i32),
                            DAG.getConstant(Col, MVT::i32),
-                           DAG.getSrcValue(CU.getGV()));
+                           DAG.getSrcValue(CU_Node));
       } else {
-        unsigned ID = DW->RecordSourceLine(Line, Col, CU);
+        unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node);
         return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);
       }
     }
@@ -1643,7 +1625,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
           " not tell us which reg is the stack pointer!");
   DebugLoc dl = Node->getDebugLoc();
-  MVT VT = Node->getValueType(0);
+  EVT VT = Node->getValueType(0);
   SDValue Tmp1 = SDValue(Node, 0);
   SDValue Tmp2 = SDValue(Node, 1);
   SDValue Tmp3 = Node->getOperand(2);
@@ -1676,14 +1658,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
 /// condition code CC on the current target. This routine assumes LHS and rHS
 /// have already been legalized by LegalizeSetCCOperands. It expands SETCC with
 /// illegal condition code into AND / OR of multiple SETCC values.
-void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
                                                  SDValue &LHS, SDValue &RHS,
                                                  SDValue &CC,
                                                  DebugLoc dl) {
-  MVT OpVT = LHS.getValueType();
+  EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: assert(0 && "Unknown condition code action!");
+  default: llvm_unreachable("Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1691,7 +1673,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: assert(0 && "Don't know how to expand this condition!"); abort();
+    default: llvm_unreachable("Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -1722,13 +1704,13 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
 /// a load from the stack slot to DestVT, extending it if needed.
 /// The resultant code need not be legal.
 SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
-                                               MVT SlotVT,
-                                               MVT DestVT,
+                                               EVT SlotVT,
+                                               EVT DestVT,
                                                DebugLoc dl) {
   // Create the stack frame object.
   unsigned SrcAlign =
     TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
-                                              getTypeForMVT());
+                                              getTypeForEVT(*DAG.getContext()));
   SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
 
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
@@ -1739,7 +1721,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   unsigned DestAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT());
+    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1787,9 +1769,9 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
   unsigned NumElems = Node->getNumOperands();
   SDValue Value1, Value2;
   DebugLoc dl = Node->getDebugLoc();
-  MVT VT = Node->getValueType(0);
-  MVT OpVT = Node->getOperand(0).getValueType();
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
 
   // If the only non-undef value is the low element, turn this into a
   // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
@@ -1833,7 +1815,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
         CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
       } else {
         assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
-        const Type *OpNTy = OpVT.getTypeForMVT();
+        const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext());
         CV.push_back(UndefValue::get(OpNTy));
       }
     }
@@ -1886,8 +1868,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-    MVT ArgVT = Node->getOperand(i).getValueType();
-    const Type *ArgTy = ArgVT.getTypeForMVT();
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
     Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
@@ -1897,10 +1879,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                          TLI.getPointerTy());
 
   // Splice the libcall in wherever FindInputOutputChains tells us to.
-  const Type *RetTy = Node->getValueType(0).getTypeForMVT();
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, CallingConv::C, false, Callee, Args, DAG,
+                    0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG,
                     Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
@@ -1916,8 +1900,8 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_F80,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
-  switch (Node->getValueType(0).getSimpleVT()) {
-  default: assert(0 && "Unexpected request for libcall!");
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -1932,8 +1916,8 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I64,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
-  switch (Node->getValueType(0).getSimpleVT()) {
-  default: assert(0 && "Unexpected request for libcall!");
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i16: LC = Call_I16; break;
   case MVT::i32: LC = Call_I32; break;
   case MVT::i64: LC = Call_I64; break;
@@ -1948,7 +1932,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
 /// legal for the target.
 SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                                    SDValue Op0,
-                                                   MVT DestVT,
+                                                   EVT DestVT,
                                                    DebugLoc dl) {
   if (Op0.getValueType() == MVT::i32) {
     // simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -2018,15 +2002,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // as a negative number.  To counteract this, the dynamic code adds an
   // offset depending on the data type.
   uint64_t FF;
-  switch (Op0.getValueType().getSimpleVT()) {
-  default: assert(0 && "Unsupported integer type!");
+  switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
   case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
   }
   if (TLI.isLittleEndian()) FF <<= 32;
-  Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+  Constant *FudgeFactor = ConstantInt::get(
+                                       Type::getInt64Ty(*DAG.getContext()), FF);
 
   SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -2054,17 +2039,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
 /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
 /// operation that takes a larger input.
 SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
-                                                    MVT DestVT,
+                                                    EVT DestVT,
                                                     bool isSigned,
                                                     DebugLoc dl) {
   // First step, figure out the appropriate *INT_TO_FP operation to use.
-  MVT NewInTy = LegalOp.getValueType();
+  EVT NewInTy = LegalOp.getValueType();
 
   unsigned OpToUse = 0;
 
   // Scan for the appropriate larger type to use.
   while (1) {
-    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
+    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
     assert(NewInTy.isInteger() && "Ran out of possibilities!");
 
     // If the target supports SINT_TO_FP of this type, use it.
@@ -2096,17 +2081,17 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
 /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
 /// operation that returns a larger result.
 SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
-                                                    MVT DestVT,
+                                                    EVT DestVT,
                                                     bool isSigned,
                                                     DebugLoc dl) {
   // First step, figure out the appropriate FP_TO*INT operation to use.
-  MVT NewOutTy = DestVT;
+  EVT NewOutTy = DestVT;
 
   unsigned OpToUse = 0;
 
   // Scan for the appropriate larger type to use.
   while (1) {
-    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1);
+    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
     assert(NewOutTy.isInteger() && "Ran out of possibilities!");
 
     if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
@@ -2134,11 +2119,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
 /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
 ///
 SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
-  MVT VT = Op.getValueType();
-  MVT SHVT = TLI.getShiftAmountTy();
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
-  switch (VT.getSimpleVT()) {
-  default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2183,15 +2168,15 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: assert(0 && "Cannot expand this yet!");
+  default: llvm_unreachable("Cannot expand this yet!");
   case ISD::CTPOP: {
     static const uint64_t mask[6] = {
       0x5555555555555555ULL, 0x3333333333333333ULL,
       0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
       0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
     };
-    MVT VT = Op.getValueType();
-    MVT ShVT = TLI.getShiftAmountTy();
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy();
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
@@ -2217,8 +2202,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // return popcount(~x);
     //
     // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
-    MVT VT = Op.getValueType();
-    MVT ShVT = TLI.getShiftAmountTy();
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy();
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
@@ -2233,7 +2218,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // unless the target has ctlz but not ctpop, in which case we use:
     // { return 32 - nlz(~x & (x-1)); }
     // see also http://www.hackersdelight.org/HDcode/ntz.cc
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
     SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
                                DAG.getNOT(dl, Op, VT),
                                DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2272,7 +2257,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
     break;
   case ISD::EH_RETURN:
-  case ISD::DECLARE:
   case ISD::DBG_LABEL:
   case ISD::EH_LABEL:
   case ISD::PREFETCH:
@@ -2291,21 +2275,22 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       Results.push_back(Node->getOperand(i));
     break;
   case ISD::UNDEF: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     if (VT.isInteger())
       Results.push_back(DAG.getConstant(0, VT));
     else if (VT.isFloatingPoint())
       Results.push_back(DAG.getConstantFP(0, VT));
     else
-      assert(0 && "Unknown value type!");
+      llvm_unreachable("Unknown value type!");
     break;
   }
   case ISD::TRAP: {
     // If this operation is not supported, lower it to 'abort()' call
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
-      TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy,
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C, false,
+                      /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
     Results.push_back(CallResult.second);
@@ -2326,7 +2311,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::SIGN_EXTEND_INREG: {
     // NOTE: we could fall back on load/store here too for targets without
     // SAR.  However, it is doubtful that any exist.
-    MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -
                         ExtraVT.getSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
@@ -2343,7 +2328,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // NOTE: there is a choice here between constantly creating new stack
     // slots and always reusing the same one.  We currently always create
     // new ones, as reuse may inhibit scheduling.
-    MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
                             Node->getValueType(0), dl);
     Results.push_back(Tmp1);
@@ -2357,8 +2342,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   case ISD::FP_TO_UINT: {
     SDValue True, False;
-    MVT VT =  Node->getOperand(0).getValueType();
-    MVT NVT = Node->getValueType(0);
+    EVT VT =  Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
     const uint64_t zero[] = {0, 0};
     APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
@@ -2379,14 +2364,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::VAARG: {
     const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = Node->getOperand(1);
     SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
-                                       getTypeAllocSize(VT.getTypeForMVT()),
+                                       getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
@@ -2434,8 +2419,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SmallVector<int, 8> Mask;
     cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
 
-    MVT VT = Node->getValueType(0);
-    MVT EltVT = VT.getVectorElementType();
+    EVT VT = Node->getValueType(0);
+    EVT EltVT = VT.getVectorElementType();
     unsigned NumElems = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0; i != NumElems; ++i) {
@@ -2458,7 +2443,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::EXTRACT_ELEMENT: {
-    MVT OpTy = Node->getOperand(0).getValueType();
+    EVT OpTy = Node->getOperand(0).getValueType();
     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
@@ -2507,7 +2492,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   case ISD::FABS: {
     // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = DAG.getConstantFP(0.0, VT);
     Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
@@ -2622,7 +2607,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::SUB: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
            TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
            "Don't know how to expand this subtraction!");
@@ -2634,7 +2619,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::UREM:
   case ISD::SREM: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     bool isSigned = Node->getOpcode() == ISD::SREM;
     unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
@@ -2662,7 +2647,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::SDIV: {
     bool isSigned = Node->getOpcode() == ISD::SDIV;
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
@@ -2680,7 +2665,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::MULHS: {
     unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
                                                               ISD::SMUL_LOHI;
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
            "If this wasn't legal, it shouldn't have been created!");
@@ -2690,7 +2675,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::MUL: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     // See if multiply or divide can be lowered using two-result operations.
     // We just need the low half of the multiply; try both the signed
@@ -2729,7 +2714,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                               ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
                               LHS, RHS);
     Results.push_back(Sum);
-    MVT OType = Node->getValueType(1);
+    EVT OType = Node->getValueType(1);
 
     SDValue Zero = DAG.getConstant(0, LHS.getValueType());
 
@@ -2770,7 +2755,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::UMULO:
   case ISD::SMULO: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDValue LHS = Node->getOperand(0);
     SDValue RHS = Node->getOperand(1);
     SDValue BottomHalf;
@@ -2786,8 +2771,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) {
-      MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2);
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -2800,7 +2785,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       // type in some cases cases.
       // Also, we can fall back to a division in some cases, but that's a big
       // performance hit in the general case.
-      assert(0 && "Don't know how to expand this operation yet!");
+      llvm_unreachable("Don't know how to expand this operation yet!");
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -2816,7 +2801,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::BUILD_PAIR: {
-    MVT PairTy = Node->getValueType(0);
+    EVT PairTy = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
     Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
@@ -2845,14 +2830,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue Table = Node->getOperand(1);
     SDValue Index = Node->getOperand(2);
 
-    MVT PTy = TLI.getPointerTy();
+    EVT PTy = TLI.getPointerTy();
     MachineFunction &MF = DAG.getMachineFunction();
     unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
     Index= DAG.getNode(ISD::MUL, dl, PTy,
                         Index, DAG.getConstant(EntrySize, PTy));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
 
-    MVT MemVT = MVT::getIntegerVT(EntrySize * 8);
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
     SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
                                 PseudoSourceValue::getJumpTable(), 0, MemVT);
     Addr = LD;
@@ -2899,7 +2884,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     // Otherwise, SETCC for the given comparison type must be completely
     // illegal; expand it into a SELECT_CC.
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
                        DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
     Results.push_back(Tmp1);
@@ -2958,12 +2943,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 }
 void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                                        SmallVectorImpl<SDValue> &Results) {
-  MVT OVT = Node->getValueType(0);
+  EVT OVT = Node->getValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
-      Node->getOpcode() == ISD::SINT_TO_FP) {
+      Node->getOpcode() == ISD::SINT_TO_FP ||
+      Node->getOpcode() == ISD::SETCC) {
     OVT = Node->getOperand(0).getValueType();
   }
-  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
@@ -2973,10 +2959,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     // Zero extend the argument.
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     // Perform the larger operation.
-    Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1);
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     if (Node->getOpcode() == ISD::CTTZ) {
       //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
-      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
                           Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
@@ -2987,7 +2973,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                           DAG.getConstant(NVT.getSizeInBits() -
                                           OVT.getSizeInBits(), NVT));
     }
-    Results.push_back(Tmp1);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
     break;
   case ISD::BSWAP: {
     unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
@@ -3012,16 +2998,26 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     break;
   case ISD::AND:
   case ISD::OR:
-  case ISD::XOR:
-    assert(OVT.isVector() && "Don't know how to promote scalar logic ops");
-    // Bit convert each of the values to the new type.
-    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
-    Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+  case ISD::XOR: {
+    unsigned ExtOp, TruncOp;
+    if (OVT.isVector()) {
+      ExtOp   = ISD::BIT_CONVERT;
+      TruncOp = ISD::BIT_CONVERT;
+    } else if (OVT.isInteger()) {
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    } else {
+      llvm_report_error("Cannot promote logic operation");
+    }
+    // Promote each of the values to the new type.
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    // Perform the larger operation, then convert back
     Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
-    // Bit convert the result back the original type.
-    Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1));
+    Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
     break;
-  case ISD::SELECT:
+  }
+  case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
     if (Node->getValueType(0).isVector()) {
       ExtOp   = ISD::BIT_CONVERT;
@@ -3046,6 +3042,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                          DAG.getIntPtrConstant(0));
     Results.push_back(Tmp1);
     break;
+  }
   case ISD::VECTOR_SHUFFLE: {
     SmallVector<int, 8> Mask;
     cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
@@ -3061,31 +3058,14 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     break;
   }
   case ISD::SETCC: {
-    // First step, figure out the appropriate operation to use.
-    // Allow SETCC to not be supported for all legal data types
-    // Mostly this targets FP
-    MVT NewInTy = Node->getOperand(0).getValueType();
-    MVT OldVT = NewInTy; OldVT = OldVT;
-
-    // Scan for the appropriate larger type to use.
-    while (1) {
-      NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
-
-      assert(NewInTy.isInteger() == OldVT.isInteger() &&
-              "Fell off of the edge of the integer world");
-      assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() &&
-              "Fell off of the edge of the floating point world");
-
-      // If the target supports SETCC of this type, use it.
-      if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy))
-        break;
-    }
-    if (NewInTy.isInteger())
-      assert(0 && "Cannot promote Legal Integer SETCC yet");
-    else {
-      Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1);
-      Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2);
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(2))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
     }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
     Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index c3c1bea..84e39b4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -20,10 +20,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 /// GetFPLibCall - Return the right libcall for the given floating point type.
-static RTLIB::Libcall GetFPLibCall(MVT VT,
+static RTLIB::Libcall GetFPLibCall(EVT VT,
                                    RTLIB::Libcall Call_F32,
                                    RTLIB::Libcall Call_F64,
                                    RTLIB::Libcall Call_F80,
@@ -41,18 +43,17 @@ static RTLIB::Libcall GetFPLibCall(MVT VT,
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        errs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "SoftenFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to soften the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to soften the result of this operator!");
 
     case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
     case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
@@ -107,14 +108,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
   // Convert the inputs to integers, and build a new pair out of them.
   return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      BitConvertToInteger(N->getOperand(0)),
                      BitConvertToInteger(N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
   return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
-                         TLI.getTypeToTransformTo(N->getValueType(0)));
+                         TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -125,7 +126,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned Size = NVT.getSizeInBits();
 
   // Mask = ~(1 << (Size-1))
@@ -136,7 +137,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -148,7 +149,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::CEIL_F32,
@@ -163,8 +164,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   SDValue RHS = BitConvertToInteger(N->getOperand(1));
   DebugLoc dl = N->getDebugLoc();
 
-  MVT LVT = LHS.getValueType();
-  MVT RVT = RHS.getValueType();
+  EVT LVT = LHS.getValueType();
+  EVT RVT = RHS.getValueType();
 
   unsigned LSize = LVT.getSizeInBits();
   unsigned RSize = RVT.getSizeInBits();
@@ -199,7 +200,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::COS_F32,
@@ -210,7 +211,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -222,7 +223,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::EXP_F32,
@@ -233,7 +234,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::EXP2_F32,
@@ -244,7 +245,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::FLOOR_F32,
@@ -255,7 +256,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::LOG_F32,
@@ -266,7 +267,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::LOG2_F32,
@@ -277,7 +278,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::LOG10_F32,
@@ -288,7 +289,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -300,7 +301,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::NEARBYINT_F32,
@@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
   SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
                      GetSoftenedFloat(N->getOperand(0)) };
@@ -324,7 +325,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
@@ -332,7 +333,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
@@ -340,7 +341,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -354,7 +355,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
   assert(N->getOperand(1).getValueType() == MVT::i32 &&
          "Unsupported power type!");
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::POWI_F32,
@@ -365,7 +366,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -377,7 +378,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::RINT_F32,
@@ -388,7 +389,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::SIN_F32,
@@ -399,7 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::SQRT_F32,
@@ -410,7 +411,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -422,7 +423,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::TRUNC_F32,
@@ -434,8 +435,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   LoadSDNode *L = cast<LoadSDNode>(N);
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   SDValue NewL;
@@ -479,19 +480,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
   SDValue Chain = N->getOperand(0); // Get the chain.
   SDValue Ptr = N->getOperand(1); // Get the pointer.
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   SDValue NewVAARG;
   NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
-  
+
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
@@ -500,9 +501,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
-  MVT SVT = N->getOperand(0).getValueType();
-  MVT RVT = N->getValueType(0);
-  MVT NVT = MVT();
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+  EVT NVT = EVT();
   DebugLoc dl = N->getDebugLoc();
 
   // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
@@ -521,7 +522,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                            NVT, N->getOperand(0));
-  return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl);
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
 }
 
 
@@ -530,18 +531,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "SoftenFloatOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to soften this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to soften this operator's operand!");
 
   case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
   case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
@@ -574,7 +574,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
                                            ISD::CondCode &CCCode, DebugLoc dl) {
   SDValue LHSInt = GetSoftenedFloat(NewLHS);
   SDValue RHSInt = GetSoftenedFloat(NewRHS);
-  MVT VT = NewLHS.getValueType();
+  EVT VT = NewLHS.getValueType();
 
   assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
 
@@ -637,7 +637,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
     }
   }
 
-  MVT RetVT = MVT::i32; // FIXME: is this the correct return type?
+  EVT RetVT = MVT::i32; // FIXME: is this the correct return type?
   SDValue Ops[2] = { LHSInt, RHSInt };
   NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
   NewRHS = DAG.getConstant(0, RetVT);
@@ -659,8 +659,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
-  MVT SVT = N->getOperand(0).getValueType();
-  MVT RVT = N->getValueType(0);
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
 
   RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
@@ -688,7 +688,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -696,7 +696,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -767,7 +767,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -778,11 +778,10 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "ExpandFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to expand the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to expand the result of this operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
@@ -830,7 +829,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
 
 void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
                                                  SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   assert(NVT.getSizeInBits() == integerPartWidth &&
          "Do not know how to expand this float constant!");
   APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
@@ -982,7 +981,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
   Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
 }
@@ -1067,7 +1066,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
   SDValue Ptr = LD->getBasePtr();
   DebugLoc dl = N->getDebugLoc();
 
-  MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
@@ -1090,10 +1089,10 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
                                                  SDValue &Hi) {
   assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Src = N->getOperand(0);
-  MVT SrcVT = Src.getValueType();
+  EVT SrcVT = Src.getValueType();
   bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
   DebugLoc dl = N->getDebugLoc();
 
@@ -1135,7 +1134,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
   static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
   const uint64_t *Parts = 0;
 
-  switch (SrcVT.getSimpleVT()) {
+  switch (SrcVT.getSimpleVT().SimpleTy) {
   default:
     assert(false && "Unsupported UINT_TO_FP!");
   case MVT::i32:
@@ -1167,7 +1166,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 /// types of the node are known to be legal, but other operands of the node may
 /// need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
@@ -1178,11 +1177,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
     switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-      cerr << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); cerr << "\n";
+      errs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); errs() << "\n";
   #endif
-      assert(0 && "Do not know how to expand this operator's operand!");
-      abort();
+      llvm_unreachable("Do not know how to expand this operator's operand!");
 
     case ISD::BIT_CONVERT:     Res = ExpandOp_BIT_CONVERT(N); break;
     case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
@@ -1224,7 +1222,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
   GetExpandedFloat(NewLHS, LHSLo, LHSHi);
   GetExpandedFloat(NewRHS, RHSLo, RHSHi);
 
-  MVT VT = NewLHS.getValueType();
+  EVT VT = NewLHS.getValueType();
   assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
 
   // FIXME:  This generated code sucks.  We want to generate
@@ -1276,7 +1274,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
@@ -1297,7 +1295,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
@@ -1374,7 +1372,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
 
-  MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0c826f6..8ac8063 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,6 +20,8 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -31,7 +33,7 @@ using namespace llvm;
 /// may also have invalid operands or may have other results that need
 /// expansion, we just know that (at least) one result needs promotion.
 void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   // See if the target wants to custom expand this node.
@@ -41,11 +43,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "PromoteIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to promote this operator!");
-    abort();
+    llvm_unreachable("Do not know how to promote this operator!");
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
   case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
   case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
@@ -161,10 +162,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
-  MVT NInVT = TLI.getTypeToTransformTo(InVT);
-  MVT OutVT = N->getValueType(0);
-  MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+  EVT InVT = InOp.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
@@ -201,7 +202,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
       std::swap(Lo, Hi);
 
     InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       MVT::getIntegerVT(NOutVT.getSizeInBits()),
+                       EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
   }
@@ -211,24 +212,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
       return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
   }
 
-  // Otherwise, lower the bit-convert to a store/load from the stack.
-  // Create the stack frame object.  Make sure it is aligned for both
-  // the source and destination types.
-  SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
-
-  // Result is an extending load from the stack slot.
-  return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT);
+  return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                     CreateStackStoreLoad(InOp, OutVT));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
-  MVT OVT = N->getValueType(0);
-  MVT NVT = Op.getValueType();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
@@ -240,18 +231,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
   // The pair element type may be legal, or may not promote to the same type as
   // the result, for example i14 = BUILD_PAIR (i7, i7).  Handle all cases.
   return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      JoinIntegers(N->getOperand(0), N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   // FIXME there is no actual debug info here
   DebugLoc dl = N->getDebugLoc();
   // Zero extend things like i1, sign extend everything else.  It shouldn't
   // matter in theory which one we pick, but this tends to give better code?
   unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-  SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT),
+  SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT),
                                SDValue(N, 0));
   assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
   return Result;
@@ -263,7 +254,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
            CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
            CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
           "can only promote integers");
-  MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
                               N->getOperand(1), N->getOperand(2),
                               N->getOperand(3), N->getOperand(4), CvtCode);
@@ -273,8 +264,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   // Zero extend to the promoted type and do the count there.
   SDValue Op = ZExtPromotedInteger(N->getOperand(0));
   DebugLoc dl = N->getDebugLoc();
-  MVT OVT = N->getValueType(0);
-  MVT NVT = Op.getValueType();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
   Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(ISD::SUB, dl, NVT, Op,
@@ -290,8 +281,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
-  MVT OVT = N->getValueType(0);
-  MVT NVT = Op.getValueType();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
   // The count is the same in the promoted type except if the original
   // value was zero.  This can be handled by setting the bit just off
@@ -303,63 +294,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
-  MVT OldVT = N->getValueType(0);
-  SDValue OldVec = N->getOperand(0);
-  if (getTypeAction(OldVec.getValueType()) == WidenVector)
-    OldVec = GetWidenedVector(N->getOperand(0));
-  unsigned OldElts = OldVec.getValueType().getVectorNumElements();
   DebugLoc dl = N->getDebugLoc();
-
-  if (OldElts == 1) {
-    assert(!isTypeLegal(OldVec.getValueType()) &&
-           "Legal one-element vector of a type needing promotion!");
-    // It is tempting to follow GetScalarizedVector by a call to
-    // GetPromotedInteger, but this would be wrong because the
-    // scalarized value may not yet have been processed.
-    return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT),
-                       GetScalarizedVector(OldVec));
-  }
-
-  // Convert to a vector half as long with an element type of twice the width,
-  // for example <4 x i16> -> <2 x i32>.
-  assert(!(OldElts & 1) && "Odd length vectors not supported!");
-  MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits());
-  assert(OldVT.isSimple() && NewVT.isSimple());
-
-  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 MVT::getVectorVT(NewVT, OldElts / 2),
-                                 OldVec);
-
-  // Extract the element at OldIdx / 2 from the new vector.
-  SDValue OldIdx = N->getOperand(1);
-  SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx,
-                               DAG.getConstant(1, TLI.getPointerTy()));
-  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx);
-
-  // Select the appropriate half of the element: Lo if OldIdx was even,
-  // Hi if it was odd.
-  SDValue Lo = Elt;
-  SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt,
-                           DAG.getConstant(OldVT.getSizeInBits(),
-                                           TLI.getPointerTy()));
-  if (TLI.isBigEndian())
-    std::swap(Lo, Hi);
-
-  // Extend to the promoted type.
-  SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx);
-  SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo);
-  return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+                     N->getOperand(1));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned NewOpc = N->getOpcode();
   DebugLoc dl = N->getDebugLoc();
 
   // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
   // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
   // and SINT conversions are Custom, there is no way to tell which is preferable.
-  // We choose SINT because that's the right thing on PPC.)  
+  // We choose SINT because that's the right thing on PPC.)
   if (N->getOpcode() == ISD::FP_TO_UINT &&
       !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
@@ -376,7 +325,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
 
   if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
@@ -403,7 +352,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   ISD::LoadExtType ExtType =
     ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
   DebugLoc dl = N->getDebugLoc();
@@ -421,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
 /// Promote the overflow flag of an overflowing arithmetic node.
 SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   // Simply change the return type of the boolean result.
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1));
-  MVT ValueVTs[] = { N->getValueType(0), NVT };
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+  EVT ValueVTs[] = { N->getValueType(0), NVT };
   SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
   SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
                             DAG.getVTList(ValueVTs, 2), Ops, 2);
@@ -442,8 +391,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
   // sign extension of its truncation to the original type.
   SDValue LHS = SExtPromotedInteger(N->getOperand(0));
   SDValue RHS = SExtPromotedInteger(N->getOperand(1));
-  MVT OVT = N->getOperand(0).getValueType();
-  MVT NVT = LHS.getValueType();
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Do the arithmetic in the larger type.
@@ -487,7 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
-  MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
   assert(isTypeLegal(SVT) && "Illegal SetCC type!");
   DebugLoc dl = N->getDebugLoc();
 
@@ -496,14 +445,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
                               N->getOperand(1), N->getOperand(2));
 
   // Convert to the expected type.
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
   return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
   return DAG.getNode(ISD::SHL, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
 }
 
@@ -532,18 +481,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
   // The input value must be properly zero extended.
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Res = ZExtPromotedInteger(N->getOperand(0));
   return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res;
 
   switch (getTypeAction(N->getOperand(0).getValueType())) {
-  default: assert(0 && "Unknown type action!");
+  default: llvm_unreachable("Unknown type action!");
   case Legal:
   case ExpandInteger:
     Res = N->getOperand(0);
@@ -565,8 +514,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
   // zero extension of its truncation to the original type.
   SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
   SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
-  MVT OVT = N->getOperand(0).getValueType();
-  MVT NVT = LHS.getValueType();
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Do the arithmetic in the larger type.
@@ -594,17 +543,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   SDValue Chain = N->getOperand(0); // Get the chain.
   SDValue Ptr = N->getOperand(1); // Get the pointer.
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  MVT RegVT = TLI.getRegisterType(VT);
-  unsigned NumRegs = TLI.getNumRegisters(VT);
+  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
   // The argument is passed as NumRegs registers of type RegVT.
 
   SmallVector<SDValue, 8> Parts(NumRegs);
@@ -618,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
     std::reverse(Parts.begin(), Parts.end());
 
   // Assemble the parts in the promoted type.
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
   for (unsigned i = 1; i < NumRegs; ++i) {
     SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
@@ -650,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -659,11 +608,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-    cerr << "PromoteIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); errs() << "\n";
   #endif
-    assert(0 && "Do not know how to promote this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to promote this operator's operand!");
 
   case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
   case ISD::BIT_CONVERT:  Res = PromoteIntOp_BIT_CONVERT(N); break;
@@ -719,7 +667,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
   // insert sign extends for ALL conditions, but zero extend is cheaper on
   // many machines (an AND instead of two shifts), so prefer it.
   switch (CCCode) {
-  default: assert(0 && "Unknown integer comparison!");
+  default: llvm_unreachable("Unknown integer comparison!");
   case ISD::SETEQ:
   case ISD::SETNE:
   case ISD::SETUGE:
@@ -770,7 +718,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
   assert(OpNo == 1 && "only know how to promote condition");
 
   // Promote all the way up to the canonical SetCC type.
-  MVT SVT = TLI.getSetCCResultType(MVT::Other);
+  EVT SVT = TLI.getSetCCResultType(MVT::Other);
   SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
 
   // The chain (Op#0) and basic block destination (Op#2) are always legal types.
@@ -780,7 +728,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
   // Since the result type is legal, the operands must promote to it.
-  MVT OVT = N->getOperand(0).getValueType();
+  EVT OVT = N->getOperand(0).getValueType();
   SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
   SDValue Hi = GetPromotedInteger(N->getOperand(1));
   assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
@@ -795,7 +743,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
   // The vector type is legal but the element type is not.  This implies
   // that the vector is a power-of-two in length and that the element
   // type does not have a strange size (eg: it is not i1).
-  MVT VecVT = N->getValueType(0);
+  EVT VecVT = N->getValueType(0);
   unsigned NumElts = VecVT.getVectorNumElements();
   assert(!(NumElts & 1) && "Legal vector of one illegal element?");
 
@@ -871,7 +819,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
   assert(OpNo == 0 && "Only know how to promote condition");
 
   // Promote all the way up to the canonical SetCC type.
-  MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
   SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
 
   return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
@@ -962,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -973,11 +921,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "ExpandIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to expand the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to expand the result of this operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
@@ -1043,10 +990,10 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
-  MVT NVT = InL.getValueType();
+  EVT NVT = InL.getValueType();
   unsigned VTBits = N->getValueType(0).getSizeInBits();
   unsigned NVTBits = NVT.getSizeInBits();
-  MVT ShTy = N->getOperand(1).getValueType();
+  EVT ShTy = N->getOperand(1).getValueType();
 
   if (N->getOpcode() == ISD::SHL) {
     if (Amt > VTBits) {
@@ -1060,7 +1007,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       Hi = InL;
     } else if (Amt == 1 &&
                TLI.isOperationLegalOrCustom(ISD::ADDC,
-                                            TLI.getTypeToExpandTo(NVT))) {
+                                            TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
       // Emit this X << 1 as X+X.
       SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
       SDValue LoOps[2] = { InL, InL };
@@ -1130,8 +1077,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
 bool DAGTypeLegalizer::
 ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
-  MVT ShTy = Amt.getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
   unsigned ShBits = ShTy.getSizeInBits();
   unsigned NVTBits = NVT.getSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
@@ -1158,7 +1105,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                       DAG.getConstant(~HighBitMask, ShTy));
 
     switch (N->getOpcode()) {
-    default: assert(0 && "Unknown shift");
+    default: llvm_unreachable("Unknown shift");
     case ISD::SHL:
       Lo = DAG.getConstant(0, NVT);              // Low part is zero.
       Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
@@ -1186,7 +1133,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                                  Amt);
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
-    default: assert(0 && "Unknown shift");
+    default: llvm_unreachable("Unknown shift");
     case ISD::SHL:  Op1 = ISD::SHL; Op2 = ISD::SRL; break;
     case ISD::SRL:
     case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
@@ -1208,8 +1155,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 bool DAGTypeLegalizer::
 ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
-  MVT ShTy = Amt.getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
   unsigned NVTBits = NVT.getSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
          "Expanded integer type size not a power of two!");
@@ -1226,7 +1173,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   SDValue Lo1, Hi1, Lo2, Hi2;
   switch (N->getOpcode()) {
-  default: assert(0 && "Unknown shift");
+  default: llvm_unreachable("Unknown shift");
   case ISD::SHL:
     // ShAmt < NVTBits
     Lo1 = DAG.getConstant(0, NVT);                  // Low part is zero.
@@ -1283,7 +1230,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
   GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
 
-  MVT NVT = LHSL.getValueType();
+  EVT NVT = LHSL.getValueType();
   SDValue LoOps[2] = { LHSL, RHSL };
   SDValue HiOps[3] = { LHSH, RHSH };
 
@@ -1295,7 +1242,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   bool hasCarry =
     TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
                                    ISD::ADDC : ISD::SUBC,
-                                 TLI.getTypeToExpandTo(NVT));
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
 
   if (hasCarry) {
     SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
@@ -1384,7 +1331,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
   if (Op.getValueType().bitsLE(NVT)) {
@@ -1408,14 +1355,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   unsigned NVTBits = NVT.getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
-                     DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
@@ -1428,14 +1375,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   unsigned NVTBits = NVT.getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
-                     DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
@@ -1453,7 +1400,7 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned NBitWidth = NVT.getSizeInBits();
   const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
   Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
@@ -1465,7 +1412,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
+  EVT NVT = Lo.getValueType();
 
   SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
@@ -1484,7 +1431,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
+  EVT NVT = Lo.getValueType();
   Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
                    DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
   Hi = DAG.getConstant(0, NVT);
@@ -1495,7 +1442,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
+  EVT NVT = Lo.getValueType();
 
   SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
@@ -1512,7 +1459,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
@@ -1522,7 +1469,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
@@ -1538,8 +1485,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
   assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
 
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   ISD::LoadExtType ExtType = N->getExtensionType();
@@ -1551,10 +1498,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   if (N->getMemoryVT().bitsLE(NVT)) {
-    MVT EVT = N->getMemoryVT();
+    EVT MemVT = N->getMemoryVT();
 
     Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        EVT, isVolatile, Alignment);
+                        MemVT, isVolatile, Alignment);
 
     // Remember the chain.
     Ch = Lo.getValue(1);
@@ -1580,7 +1527,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
-    MVT NEVT = MVT::getIntegerVT(ExcessBits);
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
 
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
@@ -1597,14 +1544,15 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   } else {
     // Big-endian - high bits are at low addresses.  Favor aligned loads at
     // the cost of some bit-fiddling.
-    MVT EVT = N->getMemoryVT();
-    unsigned EBytes = EVT.getStoreSizeInBits()/8;
+    EVT MemVT = N->getMemoryVT();
+    unsigned EBytes = MemVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
 
     // Load both the high bits and maybe some of the low bits.
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits),
+                        EVT::getIntegerVT(*DAG.getContext(),
+                                          MemVT.getSizeInBits() - ExcessBits),
                         isVolatile, Alignment);
 
     // Increment the pointer to the other half.
@@ -1613,7 +1561,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     // Load the rest of the low bits.
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize,
-                        MVT::getIntegerVT(ExcessBits),
+                        EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
                         isVolatile, MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
@@ -1652,8 +1600,8 @@ void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
                                         SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
@@ -1742,7 +1690,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1762,7 +1710,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // If we can emit an efficient shift operation, do so now.  Check to see if
@@ -1788,7 +1736,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
 
   // Next check to see if the target supports this SHL_PARTS operation or if it
   // will custom expand it.
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
   if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
       Action == TargetLowering::Custom) {
@@ -1797,7 +1745,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
 
     SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
-    MVT VT = LHSL.getValueType();
+    EVT VT = LHSL.getValueType();
     Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
     Hi = Lo.getValue(1);
     return;
@@ -1838,7 +1786,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     else if (VT == MVT::i128)
       LC = RTLIB::SRA_I128;
   }
-  
+
   if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
     SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
     SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
@@ -1846,12 +1794,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
   }
 
   if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
-    assert(0 && "Unsupported shift!");
+    llvm_unreachable("Unsupported shift!");
 }
 
 void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
   if (Op.getValueType().bitsLE(NVT)) {
@@ -1874,7 +1822,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
   }
 }
 
@@ -1882,7 +1830,7 @@ void DAGTypeLegalizer::
 ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
 
   if (EVT.bitsLE(Lo.getValueType())) {
     // sext_inreg the low part if needed.
@@ -1900,13 +1848,13 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     unsigned ExcessBits =
       EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
   }
 }
 
 void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1926,7 +1874,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
   Hi = DAG.getNode(ISD::SRL, dl,
@@ -1937,7 +1885,7 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1957,7 +1905,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1977,7 +1925,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
   if (Op.getValueType().bitsLE(NVT)) {
@@ -1996,7 +1944,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
     SplitInteger(Res, Lo, Hi);
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
-    Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits));
+    Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
   }
 }
 
@@ -2010,7 +1958,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -2019,11 +1967,10 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
   default:
   #ifndef NDEBUG
-    cerr << "ExpandIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); errs() << "\n";
   #endif
-    assert(0 && "Do not know how to expand this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to expand this operator's operand!");
 
   case ISD::BIT_CONVERT:       Res = ExpandOp_BIT_CONVERT(N); break;
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
@@ -2070,7 +2017,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   GetExpandedInteger(NewLHS, LHSLo, LHSHi);
   GetExpandedInteger(NewRHS, RHSLo, RHSHi);
 
-  MVT VT = NewLHS.getValueType();
+  EVT VT = NewLHS.getValueType();
 
   if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
     if (RHSLo == RHSHi) {
@@ -2105,7 +2052,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   // FIXME: This generated code sucks.
   ISD::CondCode LowCC;
   switch (CCCode) {
-  default: assert(0 && "Unknown integer setcc!");
+  default: llvm_unreachable("Unknown integer setcc!");
   case ISD::SETLT:
   case ISD::SETULT: LowCC = ISD::SETULT; break;
   case ISD::SETGT:
@@ -2122,7 +2069,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
 
   // NOTE: on targets without efficient SELECT of bools, we can always use
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
-  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
   SDValue Tmp1, Tmp2;
   Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
                            LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
@@ -2228,7 +2175,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
-  MVT DstVT = N->getValueType(0);
+  EVT DstVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this SINT_TO_FP!");
@@ -2242,8 +2189,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
   assert(OpNo == 1 && "Can only expand the stored value so far");
 
-  MVT VT = N->getOperand(1).getValueType();
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getOperand(1).getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   int SVOffset = N->getSrcValueOffset();
@@ -2267,7 +2214,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
-    MVT NEVT = MVT::getIntegerVT(ExcessBits);
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
 
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
@@ -2282,11 +2229,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // the cost of some bit-fiddling.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
-    MVT EVT = N->getMemoryVT();
-    unsigned EBytes = EVT.getStoreSizeInBits()/8;
+    EVT ExtVT = N->getMemoryVT();
+    unsigned EBytes = ExtVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits);
+    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
 
     if (ExcessBits < NVT.getSizeInBits()) {
       // Transfer high bits from the top of Lo to the bottom of Hi.
@@ -2309,7 +2256,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // Store the lowest ExcessBits bits in the second half.
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize,
-                           MVT::getIntegerVT(ExcessBits),
+                           EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
                            isVolatile, MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   }
@@ -2324,8 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
-  MVT SrcVT = Op.getValueType();
-  MVT DstVT = N->getValueType(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
@@ -2360,7 +2307,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
                                    ISD::SETLT);
 
     // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
-    SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)),
+    SDValue FudgePtr = DAG.getConstantPool(
+                               ConstantInt::get(*DAG.getContext(), FF.zext(64)),
                                            TLI.getPointerTy());
 
     // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 3135a44..5992f5d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -15,9 +15,11 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -113,43 +115,43 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
 
       if (I->getNodeId() != Processed) {
         if (Mapped != 0) {
-          cerr << "Unprocessed value in a map!";
+          errs() << "Unprocessed value in a map!";
           Failed = true;
         }
       } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
         if (Mapped > 1) {
-          cerr << "Value with legal type was transformed!";
+          errs() << "Value with legal type was transformed!";
           Failed = true;
         }
       } else {
         if (Mapped == 0) {
-          cerr << "Processed value not in any map!";
+          errs() << "Processed value not in any map!";
           Failed = true;
         } else if (Mapped & (Mapped - 1)) {
-          cerr << "Value in multiple maps!";
+          errs() << "Value in multiple maps!";
           Failed = true;
         }
       }
 
       if (Failed) {
         if (Mapped & 1)
-          cerr << " ReplacedValues";
+          errs() << " ReplacedValues";
         if (Mapped & 2)
-          cerr << " PromotedIntegers";
+          errs() << " PromotedIntegers";
         if (Mapped & 4)
-          cerr << " SoftenedFloats";
+          errs() << " SoftenedFloats";
         if (Mapped & 8)
-          cerr << " ScalarizedVectors";
+          errs() << " ScalarizedVectors";
         if (Mapped & 16)
-          cerr << " ExpandedIntegers";
+          errs() << " ExpandedIntegers";
         if (Mapped & 32)
-          cerr << " ExpandedFloats";
+          errs() << " ExpandedFloats";
         if (Mapped & 64)
-          cerr << " SplitVectors";
+          errs() << " SplitVectors";
         if (Mapped & 128)
-          cerr << " WidenedVectors";
-        cerr << "\n";
-        abort();
+          errs() << " WidenedVectors";
+        errs() << "\n";
+        llvm_unreachable(0);
       }
     }
   }
@@ -210,7 +212,7 @@ bool DAGTypeLegalizer::run() {
     // Scan the values produced by the node, checking to see if any result
     // types are illegal.
     for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
-      MVT ResultVT = N->getValueType(i);
+      EVT ResultVT = N->getValueType(i);
       switch (getTypeAction(ResultVT)) {
       default:
         assert(false && "Unknown action!");
@@ -263,7 +265,7 @@ ScanOperands:
       if (IgnoreNodeResults(N->getOperand(i).getNode()))
         continue;
 
-      MVT OpVT = N->getOperand(i).getValueType();
+      EVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
       default:
         assert(false && "Unknown action!");
@@ -336,7 +338,7 @@ ScanOperands:
     }
 
     if (i == NumOperands) {
-      DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n");
+      DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n");
     }
     }
 NodeDone:
@@ -405,7 +407,7 @@ NodeDone:
     if (!IgnoreNodeResults(I))
       for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
         if (!isTypeLegal(I->getValueType(i))) {
-          cerr << "Result type " << i << " illegal!\n";
+          errs() << "Result type " << i << " illegal!\n";
           Failed = true;
         }
 
@@ -413,25 +415,25 @@ NodeDone:
     for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
       if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
           !isTypeLegal(I->getOperand(i).getValueType())) {
-        cerr << "Operand type " << i << " illegal!\n";
+        errs() << "Operand type " << i << " illegal!\n";
         Failed = true;
       }
 
     if (I->getNodeId() != Processed) {
        if (I->getNodeId() == NewNode)
-         cerr << "New node not analyzed?\n";
+         errs() << "New node not analyzed?\n";
        else if (I->getNodeId() == Unanalyzed)
-         cerr << "Unanalyzed node not noticed?\n";
+         errs() << "Unanalyzed node not noticed?\n";
        else if (I->getNodeId() > 0)
-         cerr << "Operand not processed?\n";
+         errs() << "Operand not processed?\n";
        else if (I->getNodeId() == ReadyToProcess)
-         cerr << "Not added to worklist?\n";
+         errs() << "Not added to worklist?\n";
        Failed = true;
     }
 
     if (Failed) {
-      I->dump(&DAG); cerr << "\n";
-      abort();
+      I->dump(&DAG); errs() << "\n";
+      llvm_unreachable(0);
     }
   }
 #endif
@@ -479,8 +481,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
       NewOps.push_back(Op);
     } else if (Op != OrigOp) {
       // This is the first operand to change - add all operands so far.
-      for (unsigned j = 0; j < i; ++j)
-        NewOps.push_back(N->getOperand(j));
+      NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
       NewOps.push_back(Op);
     }
   }
@@ -732,6 +733,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
 }
 
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted integer");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = PromotedIntegers[Op];
@@ -740,6 +743,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for softened float");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = SoftenedFloats[Op];
@@ -748,6 +753,8 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+         "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = ScalarizedVectors[Op];
@@ -767,6 +774,9 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
                                           SDValue Hi) {
+  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded integer");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
@@ -790,6 +800,9 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
                                         SDValue Hi) {
+  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded float");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
@@ -813,6 +826,12 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
                                       SDValue Hi) {
+  assert(Lo.getValueType().getVectorElementType() ==
+         Op.getValueType().getVectorElementType() &&
+         2*Lo.getValueType().getVectorNumElements() ==
+         Op.getValueType().getVectorNumElements() &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for split vector");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
@@ -825,6 +844,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for widened vector");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = WidenedVectors[Op];
@@ -841,7 +862,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
 SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
   unsigned BitWidth = Op.getValueType().getSizeInBits();
   return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
-                     MVT::getIntegerVT(BitWidth), Op);
+                     EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
 }
 
 /// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
@@ -849,14 +870,14 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
 SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
   assert(Op.getValueType().isVector() && "Only applies to vectors!");
   unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
-  MVT EltNVT = MVT::getIntegerVT(EltWidth);
+  EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
   unsigned NumElts = Op.getValueType().getVectorNumElements();
   return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
-                     MVT::getVectorVT(EltNVT, NumElts), Op);
+                     EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
 }
 
 SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
-                                               MVT DestVT) {
+                                               EVT DestVT) {
   DebugLoc dl = Op.getDebugLoc();
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and destination types.
@@ -875,7 +896,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
 /// The last parameter being TRUE means we are dealing with a
 /// node with illegal result types. The second parameter denotes the type of
 /// illegal ResNo in that case.
-bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
   // See if the target wants to custom lower this node.
   if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
     return false;
@@ -900,21 +921,14 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {
 
 /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
 /// which is split into two not necessarily identical pieces.
-void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+  // Currently all types are split in half.
   if (!InVT.isVector()) {
-    LoVT = HiVT = TLI.getTypeToTransformTo(InVT);
+    LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
   } else {
-    MVT NewEltVT = InVT.getVectorElementType();
     unsigned NumElements = InVT.getVectorNumElements();
-    if ((NumElements & (NumElements-1)) == 0) {  // Simple power of two vector.
-      NumElements >>= 1;
-      LoVT = HiVT =  MVT::getVectorVT(NewEltVT, NumElements);
-    } else {                                     // Non-power-of-two vectors.
-      unsigned NewNumElts_Lo = 1 << Log2_32(NumElements);
-      unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo;
-      LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo);
-      HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi);
-    }
+    assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
   }
 }
 
@@ -923,14 +937,14 @@ void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {
 void DAGTypeLegalizer::GetPairElements(SDValue Pair,
                                        SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = Pair.getDebugLoc();
-  MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
   Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
                    DAG.getIntPtrConstant(0));
   Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
                    DAG.getIntPtrConstant(1));
 }
 
-SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT,
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
                                                   SDValue Index) {
   DebugLoc dl = Index.getDebugLoc();
   // Make sure the index type is big enough to compute in.
@@ -952,9 +966,9 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   // Arbitrarily use dlHi for result DebugLoc
   DebugLoc dlHi = Hi.getDebugLoc();
   DebugLoc dlLo = Lo.getDebugLoc();
-  MVT LVT = Lo.getValueType();
-  MVT HVT = Hi.getValueType();
-  MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits());
+  EVT LVT = Lo.getValueType();
+  EVT HVT = Hi.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
 
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
@@ -986,7 +1000,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
 
 /// MakeLibCall - Generate a libcall taking the given operands as arguments and
 /// returning a result of type RetVT.
-SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
                                       const SDValue *Ops, unsigned NumOps,
                                       bool isSigned, DebugLoc dl) {
   TargetLowering::ArgListTy Args;
@@ -995,7 +1009,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0; i != NumOps; ++i) {
     Entry.Node = Ops[i];
-    Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
     Args.push_back(Entry);
@@ -1003,17 +1017,19 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  const Type *RetTy = RetVT.getTypeForMVT();
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, 0, CallingConv::C, false, Callee, Args, DAG, dl);
+                    false, 0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
   return CallInfo.first;
 }
 
 /// PromoteTargetBoolean - Promote the given target boolean to a target boolean
 /// of the given type.  A target boolean is an integer value, not necessarily of
 /// type i1, the bits of which conform to getBooleanContents.
-SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
   DebugLoc dl = Bool.getDebugLoc();
   ISD::NodeType ExtendCode;
   switch (TLI.getBooleanContents()) {
@@ -1039,7 +1055,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {
 /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
 /// bits in Hi.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
-                                    MVT LoVT, MVT HiVT,
+                                    EVT LoVT, EVT HiVT,
                                     SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = Op.getDebugLoc();
   assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
@@ -1054,7 +1070,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
 /// type half the size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 02b0732..859c656 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -64,7 +64,7 @@ private:
     SoftenFloat,     // Convert this float type to a same size integer type.
     ExpandFloat,     // Split this float type into two of half the size.
     ScalarizeVector, // Replace this one-element vector with its element type.
-    SplitVector,     // This vector type should be split into smaller vectors.
+    SplitVector,     // Split this vector type into two of half the size.
     WidenVector      // This vector type should be widened into a larger vector.
   };
 
@@ -74,8 +74,8 @@ private:
   TargetLowering::ValueTypeActionImpl ValueTypeActions;
 
   /// getTypeAction - Return how we should legalize values of this type.
-  LegalizeAction getTypeAction(MVT VT) const {
-    switch (ValueTypeActions.getTypeAction(VT)) {
+  LegalizeAction getTypeAction(EVT VT) const {
+    switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
     default:
       assert(false && "Unknown legalize action!");
     case TargetLowering::Legal:
@@ -96,7 +96,7 @@ private:
         if (VT.isInteger())
           return ExpandInteger;
         else if (VT.getSizeInBits() ==
-                 TLI.getTypeToTransformTo(VT).getSizeInBits())
+                 TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
           return SoftenFloat;
         else
           return ExpandFloat;
@@ -109,8 +109,9 @@ private:
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
-  bool isTypeLegal(MVT VT) const {
-    return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+  bool isTypeLegal(EVT VT) const {
+    return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == 
+            TargetLowering::Legal);
   }
 
   /// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -185,19 +186,19 @@ private:
   // Common routines.
   SDValue BitConvertToInteger(SDValue Op);
   SDValue BitConvertVectorToIntegerVector(SDValue Op);
-  SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT);
-  bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult);
-  SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);
+  SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+  bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
-  SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+  SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
                       const SDValue *Ops, unsigned NumOps, bool isSigned,
                       DebugLoc dl);
-  SDValue PromoteTargetBoolean(SDValue Bool, MVT VT);
+  SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
   void ReplaceValueWith(SDValue From, SDValue To);
   void ReplaceValueWithHelper(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
-  void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT,
+  void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
                     SDValue &Lo, SDValue &Hi);
 
   //===--------------------------------------------------------------------===//
@@ -224,7 +225,7 @@ private:
   /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
   /// final size.
   SDValue SExtPromotedInteger(SDValue Op) {
-    MVT OldVT = Op.getValueType();
+    EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
@@ -234,7 +235,7 @@ private:
   /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
   /// final size.
   SDValue ZExtPromotedInteger(SDValue Op) {
-    MVT OldVT = Op.getValueType();
+    EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
     return DAG.getZeroExtendInReg(Op, dl, OldVT);
@@ -506,7 +507,6 @@ private:
   // Vector Result Scalarization: <1 x ty> -> ty.
   void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecRes_BinOp(SDNode *N);
-  SDValue ScalarizeVecRes_ShiftOp(SDNode *N);
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
 
   SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
@@ -518,6 +518,7 @@ private:
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+  SDValue ScalarizeVecRes_SETCC(SDNode *N);
   SDValue ScalarizeVecRes_UNDEF(SDNode *N);
   SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
   SDValue ScalarizeVecRes_VSETCC(SDNode *N);
@@ -533,8 +534,8 @@ private:
   // Vector Splitting Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
 
-  /// GetSplitVector - Given a processed vector Op which was split into smaller
-  /// vectors, this method returns the smaller vectors.  The first elements of
+  /// GetSplitVector - Given a processed vector Op which was split into vectors
+  /// of half the size, this method returns the halves.  The first elements of
   /// Op coincide with the elements of Lo; the remaining elements of Op coincide
   /// with the elements of Hi: Op is what you would get by concatenating Lo and
   /// Hi.  For example, if Op is a v8i32 that was split into two v4i32's, then
@@ -558,10 +559,10 @@ private:
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, 
+  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
                                   SDValue &Hi);
-  void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -641,7 +642,7 @@ private:
                               SDValue BasePtr, const Value *SV,
                               int SVOffset, unsigned Alignment,
                               bool isVolatile, unsigned LdWidth,
-                              MVT ResType, DebugLoc dl);
+                              EVT ResType, DebugLoc dl);
 
   /// Helper genWidenVectorStores - Helper function to generate a set of
   /// stores to store a widen vector into non widen memory
@@ -664,7 +665,7 @@ private:
 
   /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
   /// input vector must have the same element type as NVT.
-  SDValue ModifyToType(SDValue InOp, MVT WidenVT);
+  SDValue ModifyToType(SDValue InOp, EVT WidenVT);
 
 
   //===--------------------------------------------------------------------===//
@@ -686,7 +687,7 @@ private:
 
   /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
   /// which is split (or expanded) into two not necessarily identical pieces.
-  void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT);
+  void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
 
   /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
   /// high parts of the given value.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 6e5adee..0eafe62 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -11,9 +11,11 @@
 // The routines here perform legalization when the details of the type (such as
 // whether it is an integer or a float) do not matter.
 // Expansion is the act of changing a computation in an illegal type to be a
-// computation in two identical registers of a smaller type.
+// computation in two identical registers of a smaller type.  The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
 // Splitting is the act of changing a computation in an illegal type to be a
 // computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,10 +34,10 @@ using namespace llvm;
 
 void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                              SDValue &Hi) {
-  MVT OutVT = N->getValueType(0);
-  MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Handle some special cases efficiently.
@@ -59,16 +61,12 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
       Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
       return;
     case SplitVector:
-      // Convert the split parts of the input if it was split in two.
       GetSplitVector(InOp, Lo, Hi);
-      if (Lo.getValueType() == Hi.getValueType()) {
-        if (TLI.isBigEndian())
-          std::swap(Lo, Hi);
-        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
-        return;
-      }
-      break;
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
     case ScalarizeVector:
       // Convert the element instead.
       SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
@@ -78,7 +76,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
     case WidenVector: {
       assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
       InOp = GetWidenedVector(InOp);
-      MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+      EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                    InVT.getVectorNumElements()/2);
       Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                        DAG.getIntPtrConstant(0));
@@ -95,7 +93,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   if (InVT.isVector() && OutVT.isInteger()) {
     // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
     // is legal but the result is not.
-    MVT NVT = MVT::getVectorVT(NOutVT, 2);
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
 
     if (isTypeLegal(NVT)) {
       SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
@@ -106,7 +104,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      
+
       return;
     }
   }
@@ -117,7 +115,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and expanded destination types.
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT());
+    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
@@ -169,11 +167,11 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Convert to a vector of the expanded element type, for example
   // <3 x i64> -> <6 x i32>.
-  MVT OldVT = N->getValueType(0);
-  MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+  EVT OldVT = N->getValueType(0);
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 MVT::getVectorVT(NewVT, 2*OldElts),
+                                 EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
                                  OldVec);
 
   // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
@@ -200,7 +198,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   DebugLoc dl = N->getDebugLoc();
 
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   int SVOffset = LD->getSrcValueOffset();
@@ -235,7 +233,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
 }
 
 void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Chain = N->getOperand(0);
   SDValue Ptr = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
@@ -265,8 +263,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
     // instead, but only if the new vector type is legal (otherwise there
     // is no point, and it might create expansion loops).  For example, on
     // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
-    MVT OVT = N->getOperand(0).getValueType();
-    MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2);
+    EVT OVT = N->getOperand(0).getValueType();
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
 
     if (isTypeLegal(NVT)) {
       SDValue Parts[2];
@@ -286,10 +284,10 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   // The vector type is legal but the element type needs expansion.
-  MVT VecVT = N->getValueType(0);
+  EVT VecVT = N->getValueType(0);
   unsigned NumElts = VecVT.getVectorNumElements();
-  MVT OldVT = N->getOperand(0).getValueType();
-  MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+  EVT OldVT = N->getOperand(0).getValueType();
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
   DebugLoc dl = N->getDebugLoc();
 
   assert(OldVT == VecVT.getVectorElementType() &&
@@ -310,7 +308,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   }
 
   SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 MVT::getVectorVT(NewVT, NewElts.size()),
+                                 EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
                                  &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
@@ -325,20 +323,20 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   // The vector type is legal but the element type needs expansion.
-  MVT VecVT = N->getValueType(0);
+  EVT VecVT = N->getValueType(0);
   unsigned NumElts = VecVT.getVectorNumElements();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue Val = N->getOperand(1);
-  MVT OldEVT = Val.getValueType();
-  MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);
+  EVT OldEVT = Val.getValueType();
+  EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
 
   assert(OldEVT == VecVT.getVectorElementType() &&
          "Inserted element type doesn't match vector element type!");
 
   // Bitconvert to a vector of twice the length with elements of the expanded
   // type, insert the expanded vector elements, and then convert back.
-  MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
                                NewVecVT, N->getOperand(0));
 
@@ -360,7 +358,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
          "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
   unsigned NumElts = VT.getVectorNumElements();
@@ -378,7 +376,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   DebugLoc dl = N->getDebugLoc();
 
   StoreSDNode *St = cast<StoreSDNode>(N);
-  MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
   int SVOffset = St->getSrcValueOffset();
@@ -464,7 +462,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
 }
 
 void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   Lo = DAG.getUNDEF(LoVT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 335c73c..ca19430 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -20,8 +20,8 @@
 // type i8 which must be promoted.
 //
 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
-// or operations that happen to take a vector which are custom-lowered like
-// ISD::CALL; the legalization for such operations never produces nodes
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
 // with illegal types, so it's okay to put off legalizing them until
 // SelectionDAG::Legalize runs.
 //
@@ -129,7 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   if (!HasVectorValue)
     return TranslateLegalizeResults(Op, Result);
 
-  MVT QueryType;
+  EVT QueryType;
   switch (Op.getOpcode()) {
   default:
     return TranslateLegalizeResults(Op, Result);
@@ -231,10 +231,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   // Vector "promotion" is basically just bitcasting and doing the operation
   // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
   // v1i64.
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
-  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
   DebugLoc dl = Op.getDebugLoc();
   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 
@@ -260,11 +260,11 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
 }
 
 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   unsigned NumElems = VT.getVectorNumElements();
-  MVT EltVT = VT.getVectorElementType();
+  EVT EltVT = VT.getVectorElementType();
   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
-  MVT TmpEltVT = LHS.getValueType().getVectorElementType();
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
@@ -287,11 +287,11 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
 /// the operation be expanded.  "Unroll" the vector, splitting out the scalars
 /// and operating on each element individually.
 SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
   unsigned NE = VT.getVectorNumElements();
-  MVT EltVT = VT.getVectorElementType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
 
   SmallVector<SDValue, 8> Scalars;
@@ -299,10 +299,10 @@ SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
   for (unsigned i = 0; i != NE; ++i) {
     for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
       SDValue Operand = Op.getOperand(j);
-      MVT OperandVT = Operand.getValueType();
+      EVT OperandVT = Operand.getValueType();
       if (OperandVT.isVector()) {
         // A vector operand; extract a single element.
-        MVT OperandEltVT = OperandVT.getVectorElementType();
+        EVT OperandEltVT = OperandVT.getVectorElementType();
         Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                   OperandEltVT,
                                   Operand,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 68967cc..a03f825 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -15,14 +15,16 @@
 // eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
 // types.
 // Splitting is the act of changing a computation in an invalid vector type to
-// be a computation in multiple vectors of a smaller type.  For example,
-// implementing <128 x f32> operations in terms of two <64 x f32> operations.
+// be a computation in two vectors of half the size.  For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
 //
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -30,18 +32,19 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Scalarize node result " << ResNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "ScalarizeVectorResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to scalarize the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to scalarize the result of this operator!");
 
   case ISD::BIT_CONVERT:       R = ScalarizeVecRes_BIT_CONVERT(N); break;
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
@@ -53,6 +56,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
   case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
   case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
   case ISD::VSETCC:            R = ScalarizeVecRes_VSETCC(N); break;
@@ -72,9 +76,14 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FCEIL:
   case ISD::FRINT:
   case ISD::FNEARBYINT:
+  case ISD::UINT_TO_FP:
   case ISD::SINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    R = ScalarizeVecRes_UnaryOp(N);
+    break;
 
   case ISD::ADD:
   case ISD::AND:
@@ -91,11 +100,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SUB:
   case ISD::UDIV:
   case ISD::UREM:
-  case ISD::XOR:  R = ScalarizeVecRes_BinOp(N); break;
-
+  case ISD::XOR:
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break;
+  case ISD::SRL:
+    R = ScalarizeVecRes_BinOp(N);
+    break;
   }
 
   // If R is null, the sub-method took care of registering the result.
@@ -110,21 +120,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
                      LHS.getValueType(), LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) {
-  SDValue LHS = GetScalarizedVector(N->getOperand(0));
-  SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
-                     LHS.getValueType(), LHS, ShiftAmt);
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
-  MVT NewVT = N->getValueType(0).getVectorElementType();
+  EVT NewVT = N->getValueType(0).getVectorElementType();
   return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
                      NewVT, N->getOperand(0));
 }
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
-  MVT NewVT = N->getValueType(0).getVectorElementType();
+  EVT NewVT = N->getValueType(0).getVectorElementType();
   SDValue Op0 = GetScalarizedVector(N->getOperand(0));
   return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
                               Op0, DAG.getValueType(NewVT),
@@ -150,7 +153,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   // The value to insert may have a wider type than the vector element type,
   // so be sure to truncate it to the element type if necessary.
   SDValue Op = N->getOperand(1);
-  MVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT EltVT = N->getValueType(0).getVectorElementType();
   if (Op.getValueType() != EltVT)
     // FIXME: Can this happen for floating point types?
     Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
@@ -167,7 +170,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT().getVectorElementType(),
-                               N->isVolatile(), N->getAlignment());
+                               N->isVolatile(), N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -177,7 +180,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
-  MVT DestVT = N->getValueType(0).getVectorElementType();
+  EVT DestVT = N->getValueType(0).getVectorElementType();
   SDValue Op = GetScalarizedVector(N->getOperand(0));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
 }
@@ -185,7 +188,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   // If the operand is wider than the vector element type then it is implicitly
   // truncated.  Make that explicit here.
-  MVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT EltVT = N->getValueType(0).getVectorElementType();
   SDValue InOp = N->getOperand(0);
   if (InOp.getValueType() != EltVT)
     return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
@@ -207,6 +210,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
                      N->getOperand(4));
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
   return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
 }
@@ -223,12 +235,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
 SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(0));
   SDValue RHS = GetScalarizedVector(N->getOperand(1));
-  MVT NVT = N->getValueType(0).getVectorElementType();
-  MVT SVT = TLI.getSetCCResultType(LHS.getValueType());
-  DebugLoc dl = N->getDebugLoc();
+  EVT NVT = N->getValueType(0).getVectorElementType();
+  EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+  DebugLoc DL = N->getDebugLoc();
 
   // Turn it into a scalar SETCC.
-  SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2));
+  SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
 
   // VSETCC always returns a sign-extended value, while SETCC may not.  The
   // SETCC result type may not match the vector element type.  Correct these.
@@ -237,19 +249,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
     // Ensure the SETCC result is sign-extended.
     if (TLI.getBooleanContents() !=
         TargetLowering::ZeroOrNegativeOneBooleanContent)
-      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res,
+      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
                         DAG.getValueType(MVT::i1));
     // Truncate to the final type.
-    return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
-  } else {
-    // The SETCC result type is smaller than the vector element type.
-    // If the SetCC result is not sign-extended, chop it down to MVT::i1.
-    if (TLI.getBooleanContents() !=
-        TargetLowering::ZeroOrNegativeOneBooleanContent)
-      Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res);
-    // Sign extend to the final type.
-    return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res);
+    return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
   }
+
+  // The SETCC result type is smaller than the vector element type.
+  // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+  if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+    Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+  // Sign extend to the final type.
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
 }
 
 
@@ -258,31 +270,32 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Scalarize node operand " << OpNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      cerr << "ScalarizeVectorOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); cerr << "\n";
+      errs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      errs() << "\n";
 #endif
-      assert(0 && "Do not know how to scalarize this operator's operand!");
-      abort();
-
+      llvm_unreachable("Do not know how to scalarize this operator's operand!");
     case ISD::BIT_CONVERT:
-      Res = ScalarizeVecOp_BIT_CONVERT(N); break;
-
+      Res = ScalarizeVecOp_BIT_CONVERT(N);
+      break;
     case ISD::CONCAT_VECTORS:
-      Res = ScalarizeVecOp_CONCAT_VECTORS(N); break;
-
+      Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+      break;
     case ISD::EXTRACT_VECTOR_ELT:
-      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break;
-
+      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+      break;
     case ISD::STORE:
-      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
     }
   }
 
@@ -323,7 +336,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
 /// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
 /// index.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
-  return GetScalarizedVector(N->getOperand(0));
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != N->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+                      Res);
+  return Res;
 }
 
 /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
@@ -343,7 +360,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
                       N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
-                      N->isVolatile(), N->getAlignment());
+                      N->isVolatile(), N->getOriginalAlignment());
 }
 
 
@@ -357,17 +374,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
 /// legalization, we just know that (at least) one result needs vector
 /// splitting.
 void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Split node result: ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Lo, Hi;
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "SplitVectorResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "SplitVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to split the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to split the result of this operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
@@ -382,10 +401,16 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
-  case ISD::LOAD:              SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
+  case ISD::LOAD:
+    SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+    break;
+  case ISD::SETCC:
+  case ISD::VSETCC:
+    SplitVecRes_SETCC(N, Lo, Hi);
+    break;
   case ISD::VECTOR_SHUFFLE:
-      SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break;
-  case ISD::VSETCC:            SplitVecRes_VSETCC(N, Lo, Hi); break;
+    SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+    break;
 
   case ISD::CTTZ:
   case ISD::CTLZ:
@@ -403,8 +428,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    SplitVecRes_UnaryOp(N, Lo, Hi);
+    break;
 
   case ISD::ADD:
   case ISD::SUB:
@@ -424,7 +454,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SRL:
   case ISD::UREM:
   case ISD::SREM:
-  case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break;
+  case ISD::FREM:
+    SplitVecRes_BinOp(N, Lo, Hi);
+    break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -448,12 +480,12 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   // We know the result is a vector.  The input may be either a vector or a
   // scalar value.
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   DebugLoc dl = N->getDebugLoc();
 
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
 
   // Handle some special cases efficiently.
   switch (getTypeAction(InVT)) {
@@ -488,8 +520,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   }
 
   // In the general case, convert the input to an integer and split it by hand.
-  MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits());
-  MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits());
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
   if (TLI.isBigEndian())
     std::swap(LoIntVT, HiIntVT);
 
@@ -503,7 +535,7 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   unsigned LoNumElts = LoVT.getVectorNumElements();
@@ -525,7 +557,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
     return;
   }
 
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
   SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
@@ -537,7 +569,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
                                                   SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
@@ -550,12 +582,11 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
 
   // Split the input.
   SDValue VLo, VHi;
-  MVT InVT = N->getOperand(0).getValueType();
+  EVT InVT = N->getOperand(0).getValueType();
   switch (getTypeAction(InVT)) {
-  default: assert(0 && "Unexpected type action!");
+  default: llvm_unreachable("Unexpected type action!");
   case Legal: {
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                       DAG.getIntPtrConstant(0));
@@ -570,9 +601,8 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
     // If the result needs to be split and the input needs to be widened,
     // the two types must have different lengths. Use the widened result
     // and extract from it to do the split.
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
     SDValue InOp = GetWidenedVector(N->getOperand(0));
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                      DAG.getIntPtrConstant(0));
@@ -595,14 +625,11 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
   SDValue Idx = N->getOperand(1);
-  MVT IdxVT = Idx.getValueType();
+  EVT IdxVT = Idx.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-  // The indices are not guaranteed to be a multiple of the new vector
-  // size unless the original vector type was split in two.
-  assert(LoVT == HiVT && "Non power-of-two vectors not supported!");
 
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
   Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
@@ -639,8 +666,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   }
 
   // Spill the vector to the stack.
-  MVT VecVT = Vec.getValueType();
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
 
@@ -648,7 +675,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT());
+    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
 
   // Load the Lo part from the stack slot.
@@ -666,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
                                                     SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
@@ -676,7 +703,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                                         SDValue &Hi) {
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = LD->getDebugLoc();
   GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
 
@@ -686,11 +713,11 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
   const Value *SV = LD->getSrcValue();
   int SVOffset = LD->getSrcValueOffset();
-  MVT MemoryVT = LD->getMemoryVT();
-  unsigned Alignment = LD->getAlignment();
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
 
-  MVT LoMemVT, HiMemVT;
+  EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
@@ -700,7 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   SVOffset += IncrementSize;
-  Alignment = MinAlign(Alignment, IncrementSize);
   Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
                    SV, SVOffset, HiMemVT, isVolatile, Alignment);
 
@@ -714,20 +740,43 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   ReplaceValueWith(SDValue(LD, 1), Ch);
 }
 
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  SDValue LL, LH, RL, RH;
+  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                               LoVT.getVectorNumElements());
+  LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(0));
+  LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(0));
+  RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
 void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   // Get the dest types - they may not match the input types, e.g. int_to_fp.
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
   // Split the input.
-  MVT InVT = N->getOperand(0).getValueType();
+  EVT InVT = N->getOperand(0).getValueType();
   switch (getTypeAction(InVT)) {
-  default: assert(0 && "Unexpected type action!");
+  default: llvm_unreachable("Unexpected type action!");
   case Legal: {
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                      DAG.getIntPtrConstant(0));
@@ -742,9 +791,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
     // If the result needs to be split and the input needs to be widened,
     // the two types must have different lengths. Use the widened result
     // and extract from it to do the split.
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
     SDValue InOp = GetWidenedVector(N->getOperand(0));
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                      DAG.getIntPtrConstant(0));
@@ -765,10 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
   DebugLoc dl = N->getDebugLoc();
   GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
   GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
-  MVT NewVT = Inputs[0].getValueType();
+  EVT NewVT = Inputs[0].getValueType();
   unsigned NewElts = NewVT.getVectorNumElements();
-  assert(NewVT == Inputs[1].getValueType() &&
-         "Non power-of-two vectors not supported!");
 
   // If Lo or Hi uses elements from at most two of the four input vectors, then
   // express it as a vector shuffle of those two inputs.  Otherwise extract the
@@ -825,7 +871,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
     }
 
     if (useBuildVector) {
-      MVT EltVT = NewVT.getVectorElementType();
+      EVT EltVT = NewVT.getVectorElementType();
       SmallVector<SDValue, 16> SVOps;
 
       // Extract the input elements by hand.
@@ -868,20 +914,6 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
   }
 }
 
-void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
-                                          SDValue &Hi) {
-  MVT LoVT, HiVT;
-  DebugLoc dl = N->getDebugLoc();
-  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-
-  SDValue LL, LH, RL, RH;
-  GetSplitVector(N->getOperand(0), LL, LH);
-  GetSplitVector(N->getOperand(1), RL, RH);
-
-  Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2));
-  Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2));
-}
-
 
 //===----------------------------------------------------------------------===//
 //  Operand Vector Splitting
@@ -892,24 +924,27 @@ void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need legalization as well as the specified one.
 bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Split node operand: ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      cerr << "SplitVectorOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); cerr << "\n";
+      errs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      errs() << "\n";
 #endif
-      assert(0 && "Do not know how to split this operator's operand!");
-      abort();
+      llvm_unreachable("Do not know how to split this operator's operand!");
 
     case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break;
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
-    case ISD::STORE:             Res = SplitVecOp_STORE(cast<StoreSDNode>(N),
-                                                        OpNo); break;
+    case ISD::STORE:
+      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
 
     case ISD::CTTZ:
     case ISD::CTLZ:
@@ -917,8 +952,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
     case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:
     case ISD::TRUNCATE:
-    case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break;
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      Res = SplitVecOp_UnaryOp(N);
+      break;
     }
   }
 
@@ -939,15 +979,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   // The result has a legal vector type, but the input needs splitting.
-  MVT ResVT = N->getValueType(0);
+  EVT ResVT = N->getValueType(0);
   SDValue Lo, Hi;
   DebugLoc dl = N->getDebugLoc();
   GetSplitVector(N->getOperand(0), Lo, Hi);
-  assert(Lo.getValueType() == Hi.getValueType() &&
-         "Returns legal non-power-of-two vector type?");
-  MVT InVT = Lo.getValueType();
+  EVT InVT = Lo.getValueType();
 
-  MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
                                InVT.getVectorNumElements());
 
   Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
@@ -975,7 +1013,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
   // We know that the extracted result type is legal.  For now, assume the index
   // is a constant.
-  MVT SubVT = N->getValueType(0);
+  EVT SubVT = N->getValueType(0);
   SDValue Idx = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
   SDValue Lo, Hi;
@@ -997,7 +1035,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue Vec = N->getOperand(0);
   SDValue Idx = N->getOperand(1);
-  MVT VecVT = Vec.getValueType();
+  EVT VecVT = Vec.getValueType();
 
   if (isa<ConstantSDNode>(Idx)) {
     uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
@@ -1010,14 +1048,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 
     if (IdxVal < LoElts)
       return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
-    else
-      return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
-                                    DAG.getConstant(IdxVal - LoElts,
-                                                    Idx.getValueType()));
+    return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+                                  DAG.getConstant(IdxVal - LoElts,
+                                                  Idx.getValueType()));
   }
 
   // Store the vector to the stack.
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT EltVT = VecVT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -1026,7 +1063,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
-  return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        SV, 0, EltVT);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1038,13 +1076,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   int SVOffset = N->getSrcValueOffset();
-  MVT MemoryVT = N->getMemoryVT();
-  unsigned Alignment = N->getAlignment();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
   SDValue Lo, Hi;
   GetSplitVector(N->getOperand(1), Lo, Hi);
 
-  MVT LoMemVT, HiMemVT;
+  EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
@@ -1059,15 +1097,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
+  SVOffset += IncrementSize;
 
   if (isTruncating)
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
-                           N->getSrcValue(), SVOffset+IncrementSize,
-                           HiMemVT,
-                           isVol, MinAlign(Alignment, IncrementSize));
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+                           HiMemVT, isVol, Alignment);
   else
-    Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize,
-                      isVol, MinAlign(Alignment, IncrementSize));
+    Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+                      isVol, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
@@ -1078,18 +1115,19 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Widen node result " << ResNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "WidenVectorResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to widen the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to widen the result of this operator!");
 
   case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
   case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
@@ -1102,9 +1140,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
-  case ISD::VECTOR_SHUFFLE:    
-      Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break;
-  case ISD::VSETCC:            Res = WidenVecRes_VSETCC(N); break;
+  case ISD::VECTOR_SHUFFLE:
+    Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+    break;
+  case ISD::VSETCC:
+    Res = WidenVecRes_VSETCC(N);
+    break;
 
   case ISD::ADD:
   case ISD::AND:
@@ -1126,21 +1167,27 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UDIV:
   case ISD::UREM:
   case ISD::SUB:
-  case ISD::XOR:               Res = WidenVecRes_Binary(N); break;
+  case ISD::XOR:
+    Res = WidenVecRes_Binary(N);
+    break;
 
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL:               Res = WidenVecRes_Shift(N); break;
+  case ISD::SRL:
+    Res = WidenVecRes_Shift(N);
+    break;
 
-  case ISD::ANY_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
-  case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
-  case ISD::UINT_TO_FP:        Res = WidenVecRes_Convert(N); break;
+  case ISD::ANY_EXTEND:
+    Res = WidenVecRes_Convert(N);
+    break;
 
   case ISD::CTLZ:
   case ISD::CTPOP:
@@ -1149,7 +1196,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FCOS:
   case ISD::FNEG:
   case ISD::FSIN:
-  case ISD::FSQRT:             Res = WidenVecRes_Unary(N); break;
+  case ISD::FSQRT:
+    Res = WidenVecRes_Unary(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -1159,7 +1208,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp1 = GetWidenedVector(N->getOperand(0));
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
@@ -1169,12 +1218,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   DebugLoc dl = N->getDebugLoc();
 
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
-  MVT InVT = InOp.getValueType();
-  MVT InEltVT = InVT.getVectorElementType();
-  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
 
   unsigned Opcode = N->getOpcode();
   unsigned InVTNumElts = InVT.getVectorNumElements();
@@ -1216,7 +1265,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = WidenVT.getVectorElementType();
+  EVT EltVT = WidenVT.getVectorElementType();
   unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
   unsigned i;
   for (i=0; i < MinElts; ++i)
@@ -1232,16 +1281,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   SDValue ShOp = N->getOperand(1);
 
-  MVT ShVT = ShOp.getValueType();
+  EVT ShVT = ShOp.getValueType();
   if (getTypeAction(ShVT) == WidenVector) {
     ShOp = GetWidenedVector(ShOp);
     ShVT = ShOp.getValueType();
   }
-  MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
                                    WidenVT.getVectorNumElements());
   if (ShVT != ShWidenVT)
     ShOp = ModifyToType(ShOp, ShWidenVT);
@@ -1251,16 +1300,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
   // Unary op widening.
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
-  MVT VT = N->getValueType(0);
-  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT InVT = InOp.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
@@ -1300,13 +1349,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     // Determine new input vector type.  The new input vector type will use
     // the same element type (if its a vector) or use the input type as a
     // vector.  It is the same size as the type to widen to.
-    MVT NewInVT;
+    EVT NewInVT;
     unsigned NewNumElts = WidenSize / InSize;
     if (InVT.isVector()) {
-      MVT InEltVT = InVT.getVectorElementType();
-      NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits());
+      EVT InEltVT = InVT.getVectorElementType();
+      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
     } else {
-      NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
 
     if (TLI.isTypeLegal(NewInVT)) {
@@ -1332,28 +1381,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     }
   }
 
-  // This should occur rarely. Lower the bit-convert to a store/load
-  // from the stack. Create the stack frame object.  Make sure it is aligned
-  // for both the source and destination types.
-  SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
-
-  // Result is a load from the stack slot.
-  return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0);
+  return CreateStackStoreLoad(InOp, WidenVT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   // Build a vector with undefined for the new nodes.
-  MVT VT = N->getValueType(0);
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   unsigned NumElts = VT.getVectorNumElements();
 
-  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
@@ -1365,8 +1403,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
-  MVT InVT = N->getOperand(0).getValueType();
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
   unsigned NumOperands = N->getNumOperands();
@@ -1387,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     }
   } else {
     InputWidened = true;
-    if (WidenVT == TLI.getTypeToTransformTo(InVT)) {
+    if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
       // The inputs and the result are widen to the same value.
       unsigned i;
       for (i=1; i < NumOperands; ++i)
@@ -1406,7 +1444,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
           MaskOps[i] = i;
           MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
         }
-        return DAG.getVectorShuffle(WidenVT, dl, 
+        return DAG.getVectorShuffle(WidenVT, dl,
                                     GetWidenedVector(N->getOperand(0)),
                                     GetWidenedVector(N->getOperand(1)),
                                     &MaskOps[0]);
@@ -1415,7 +1453,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
   }
 
   // Fall back to use extracts and build vector.
-  MVT EltVT = WidenVT.getVectorElementType();
+  EVT EltVT = WidenVT.getVectorElementType();
   unsigned NumInElts = InVT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   unsigned Idx = 0;
@@ -1439,12 +1477,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   SDValue RndOp = N->getOperand(3);
   SDValue SatOp = N->getOperand(4);
 
-  MVT      WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
-  MVT InVT = InOp.getValueType();
-  MVT InEltVT = InVT.getVectorElementType();
-  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
 
   SDValue DTyOp = DAG.getValueType(WidenVT);
   SDValue STyOp = DAG.getValueType(InWidenVT);
@@ -1491,7 +1529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = WidenVT.getVectorElementType();
+  EVT EltVT = WidenVT.getVectorElementType();
   DTyOp = DAG.getValueType(EltVT);
   STyOp = DAG.getValueType(InEltVT);
 
@@ -1512,8 +1550,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
-  MVT      VT = N->getValueType(0);
-  MVT      WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT      VT = N->getValueType(0);
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
   SDValue  InOp = N->getOperand(0);
   SDValue  Idx  = N->getOperand(1);
@@ -1522,7 +1560,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   if (getTypeAction(InOp.getValueType()) == WidenVector)
     InOp = GetWidenedVector(InOp);
 
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
 
   ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
   if (CIdx) {
@@ -1540,8 +1578,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   // We could try widening the input to the right length but for now, extract
   // the original elements, fill the rest with undefs and build a vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = VT.getVectorElementType();
-  MVT IdxVT = Idx.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Idx.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
   unsigned i;
   if (CIdx) {
@@ -1573,8 +1611,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0));
-  MVT LdVT    = LD->getMemoryVT();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT    = LD->getMemoryVT();
   DebugLoc dl = N->getDebugLoc();
   assert(LdVT.isVector() && WidenVT.isVector());
 
@@ -1593,8 +1631,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
     // For extension loads, we can not play the tricks of chopping legal
     // vector types and bit cast it to the right type.  Instead, we unroll
     // the load and build a vector.
-    MVT EltVT = WidenVT.getVectorElementType();
-    MVT LdEltVT = LdVT.getVectorElementType();
+    EVT EltVT = WidenVT.getVectorElementType();
+    EVT LdEltVT = LdVT.getVectorElementType();
     unsigned NumElts = LdVT.getVectorNumElements();
 
     // Load each element and widen
@@ -1638,26 +1676,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
 
   // Modified the chain - switch anything that used the old chain to use
   // the new one.
-  ReplaceValueWith(SDValue(N, 1), Chain);
+  ReplaceValueWith(SDValue(N, 1), NewChain);
 
   return Result;
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
                      WidenVT, N->getOperand(0));
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   SDValue Cond1 = N->getOperand(0);
-  MVT CondVT = Cond1.getValueType();
+  EVT CondVT = Cond1.getValueType();
   if (CondVT.isVector()) {
-    MVT CondEltVT = CondVT.getVectorElementType();
-    MVT CondWidenVT =  MVT::getVectorVT(CondEltVT, WidenNumElts);
+    EVT CondEltVT = CondVT.getVectorElementType();
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
     if (getTypeAction(CondVT) == WidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
@@ -1681,15 +1719,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
- MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
  return DAG.getUNDEF(WidenVT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   unsigned NumElts = VT.getVectorNumElements();
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
@@ -1711,13 +1749,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   SDValue InOp1 = N->getOperand(0);
-  MVT InVT = InOp1.getValueType();
+  EVT InVT = InOp1.getValueType();
   assert(InVT.isVector() && "can not widen non vector type");
-  MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts);
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -1735,18 +1773,19 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Widen node operand " << ResNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "WidenVectorOperand op #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "WidenVectorOperand op #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to widen this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to widen this operator's operand!");
 
   case ISD::BIT_CONVERT:        Res = WidenVecOp_BIT_CONVERT(N); break;
   case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
@@ -1757,8 +1796,13 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::UINT_TO_FP:         Res = WidenVecOp_Convert(N); break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Res = WidenVecOp_Convert(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -1781,15 +1825,15 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   // Since the result is legal and the input is illegal, it is unlikely
   // that we can fix the input to a legal type so unroll the convert
   // into some scalar code and create a nasty build vector.
-  MVT VT = N->getValueType(0);
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   unsigned NumElts = VT.getVectorNumElements();
   SDValue InOp = N->getOperand(0);
   if (getTypeAction(InOp.getValueType()) == WidenVector)
     InOp = GetWidenedVector(InOp);
-  MVT InVT = InOp.getValueType();
-  MVT InEltVT = InVT.getVectorElementType();
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
 
   unsigned Opcode = N->getOpcode();
   SmallVector<SDValue, 16> Ops(NumElts);
@@ -1802,9 +1846,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   SDValue InOp = GetWidenedVector(N->getOperand(0));
-  MVT InWidenVT = InOp.getValueType();
+  EVT InWidenVT = InOp.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Check if we can convert between two legal vector types and extract.
@@ -1812,7 +1856,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
   unsigned Size = VT.getSizeInBits();
   if (InWidenSize % Size == 0 && !VT.isVector()) {
     unsigned NewNumElts = InWidenSize / Size;
-    MVT NewVT = MVT::getVectorVT(VT, NewNumElts);
+    EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
     if (TLI.isTypeLegal(NewVT)) {
       SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
@@ -1820,31 +1864,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
     }
   }
 
-  // Lower the bit-convert to a store/load from the stack. Create the stack
-  // frame object.  Make sure it is aligned for both the source and destination
-  // types.
-  SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
-
-  // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0);
+  return CreateStackStoreLoad(InOp, VT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
   // If the input vector is not legal, it is likely that we will not find a
   // legal vector of the same size. Replace the concatenate vector with a
   // nasty build vector.
-  MVT VT = N->getValueType(0);
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   unsigned NumElts = VT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(NumElts);
 
-  MVT InVT = N->getOperand(0).getValueType();
+  EVT InVT = N->getOperand(0).getValueType();
   unsigned NumInElts = InVT.getVectorNumElements();
 
   unsigned Idx = 0;
@@ -1862,9 +1895,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue InOp = GetWidenedVector(N->getOperand(0));
-  MVT EltVT = InOp.getValueType().getVectorElementType();
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
-                     EltVT, InOp, N->getOperand(1));
+                     N->getValueType(0), InOp, N->getOperand(1));
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
@@ -1880,8 +1912,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = N->getDebugLoc();
 
-  MVT StVT = ST->getMemoryVT();
-  MVT ValVT = ValOp.getValueType();
+  EVT StVT = ST->getMemoryVT();
+  EVT ValVT = ValOp.getValueType();
   // It must be true that we the widen vector type is bigger than where
   // we need to store.
   assert(StVT.isVector() && ValOp.getValueType().isVector());
@@ -1892,8 +1924,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
     // For truncating stores, we can not play the tricks of chopping legal
     // vector types and bit cast it to the right type.  Instead, we unroll
     // the store.
-    MVT StEltVT  = StVT.getVectorElementType();
-    MVT ValEltVT = ValVT.getVectorElementType();
+    EVT StEltVT  = StVT.getVectorElementType();
+    EVT ValEltVT = ValVT.getVectorElementType();
     unsigned Increment = ValEltVT.getSizeInBits() / 8;
     unsigned NumElts = StVT.getVectorNumElements();
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
@@ -1938,9 +1970,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
 //  VecVT: Vector value type whose size we must match.
 // Returns NewVecVT and NewEltVT - the vector type and its associated
 // element type.
-static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
-                                  MVT VecVT,
-                                  MVT& NewEltVT, MVT& NewVecVT) {
+static void FindAssocWidenVecType(SelectionDAG& DAG,
+                                  const TargetLowering &TLI, unsigned Width,
+                                  EVT VecVT,
+                                  EVT& NewEltVT, EVT& NewVecVT) {
   unsigned EltWidth = Width + 1;
   if (TLI.isTypeLegal(VecVT)) {
     // We start with the preferred with, making it a power of 2 and find a
@@ -1950,9 +1983,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
     do {
       assert(EltWidth > 0);
       EltWidth = 1 << Log2_32(EltWidth - 1);
-      NewEltVT = MVT::getIntegerVT(EltWidth);
+      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
       unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
-      NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
     } while (!TLI.isTypeLegal(NewVecVT) ||
              VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
   } else {
@@ -1965,9 +1998,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
      do {
       assert(EltWidth > 0);
       EltWidth = 1 << Log2_32(EltWidth - 1);
-      NewEltVT = MVT::getIntegerVT(EltWidth);
+      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
       unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
-      NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
     } while (!TLI.isTypeLegal(NewEltVT) ||
              VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
   }
@@ -1981,7 +2014,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
                                               unsigned     Alignment,
                                               bool         isVolatile,
                                               unsigned     LdWidth,
-                                              MVT          ResType,
+                                              EVT          ResType,
                                               DebugLoc     dl) {
   // The strategy assumes that we can efficiently load powers of two widths.
   // The routines chops the vector into the largest power of 2 load and
@@ -1992,9 +2025,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   //       the load is nonvolatile, we an use a wider load for the value.
 
   // Find the vector type that can load from.
-  MVT NewEltVT, NewVecVT;
+  EVT NewEltVT, NewVecVT;
   unsigned NewEltVTWidth;
-  FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+  FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
   NewEltVTWidth = NewEltVT.getSizeInBits();
 
   SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
@@ -2021,7 +2054,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       // Our current type we are using is too large, use a smaller size by
       // using a smaller power of 2
       unsigned oNewEltVTWidth = NewEltVTWidth;
-      FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+      FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
       NewEltVTWidth = NewEltVT.getSizeInBits();
       // Readjust position and vector position based on new load type
       Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
@@ -2056,10 +2089,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
   // want to store.  This avoids requiring a stack convert.
 
   // Find a width of the element type we can store with
-  MVT WidenVT = ValOp.getValueType();
-  MVT NewEltVT, NewVecVT;
+  EVT WidenVT = ValOp.getValueType();
+  EVT NewEltVT, NewVecVT;
 
-  FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+  FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
   unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
 
   SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
@@ -2088,7 +2121,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       // Our current type we are using is too large, use a smaller size by
       // using a smaller power of 2
       unsigned oNewEltVTWidth = NewEltVTWidth;
-      FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+      FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
       NewEltVTWidth = NewEltVT.getSizeInBits();
       // Readjust position and vector position based on new load type
       Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
@@ -2106,10 +2139,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
 
 /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
 /// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   // Note that InOp might have been widened so it might already have
   // the right width or it might need be narrowed.
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
   assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
          "input and widen element type must match");
   DebugLoc dl = InOp.getDebugLoc();
@@ -2137,7 +2170,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
 
   // Fall back to extract and build.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = NVT.getVectorElementType();
+  EVT EltVT = NVT.getVectorElementType();
   unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
   unsigned Idx;
   for (Idx = 0; Idx < MinNumElts; ++Idx)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index af73b28..e0f93d8 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -24,6 +24,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
@@ -108,14 +110,14 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGFast::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(NULL);
 
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
@@ -132,17 +134,17 @@ void ScheduleDAGFast::Schedule() {
 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
-  --PredSU->NumSuccsLeft;
-  
+
 #ifndef NDEBUG
-  if (PredSU->NumSuccsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (PredSU->NumSuccsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --PredSU->NumSuccsLeft;
+
   // If all the node's successors are scheduled, this node is ready
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -174,7 +176,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -214,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   SUnit *NewSU;
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
-    MVT VT = N->getValueType(i);
+    EVT VT = N->getValueType(i);
     if (VT == MVT::Flag)
       return NULL;
     else if (VT == MVT::Other)
@@ -222,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   }
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
-    MVT VT = Op.getNode()->getValueType(Op.getResNo());
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
     if (VT == MVT::Flag)
       return NULL;
   }
@@ -232,7 +234,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -342,7 +344,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = Clone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -419,7 +421,7 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
 /// definition of the specified node.
 /// FIXME: Move to SelectionDAG?
-static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
   assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
@@ -533,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
         const TargetRegisterClass *RC =
           TRI->getPhysicalRegisterRegClass(Reg, VT);
         const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -549,16 +551,16 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DOUT << "Adding an edge from SU # " << TrySU->NodeNum
-               << " to SU #" << Copies.front()->NodeNum << "\n";
+          DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
                               /*isMustAlias=*/false, /*isArtificial=*/true));
           NewDef = Copies.back();
         }
 
-        DOUT << "Adding an edge from SU # " << NewDef->NodeNum
-             << " to SU #" << TrySU->NodeNum << "\n";
+        DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
                              /*Reg=*/0, /*isNormalMemory=*/false,
@@ -568,8 +570,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
       }
 
       if (!CurSU) {
-        assert(false && "Unable to resolve live physical register dependencies!");
-        abort();
+        llvm_unreachable("Unable to resolve live physical register dependencies!");
       }
     }
 
@@ -587,41 +588,11 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
     ++CurCycle;
   }
 
-  // Reverse the order if it is bottom up.
+  // Reverse the order since it is bottom up.
   std::reverse(Sequence.begin(), Sequence.end());
-  
-  
+
 #ifndef NDEBUG
-  // Verify that all SUnits were scheduled.
-  bool AnyNotSched = false;
-  unsigned DeadNodes = 0;
-  unsigned Noops = 0;
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    if (!SUnits[i].isScheduled) {
-      if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
-        ++DeadNodes;
-        continue;
-      }
-      if (!AnyNotSched)
-        cerr << "*** List scheduling failed! ***\n";
-      SUnits[i].dump(this);
-      cerr << "has not been scheduled!\n";
-      AnyNotSched = true;
-    }
-    if (SUnits[i].NumSuccsLeft != 0) {
-      if (!AnyNotSched)
-        cerr << "*** List scheduling failed! ***\n";
-      SUnits[i].dump(this);
-      cerr << "has successors left!\n";
-      AnyNotSched = true;
-    }
-  }
-  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
-    if (!Sequence[i])
-      ++Noops;
-  assert(!AnyNotSched);
-  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
-         "The number of nodes scheduled doesn't match the expected number!");
+  VerifySchedule(/*isBottomUp=*/true);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index c432534..c8d2158 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -29,6 +29,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/Statistic.h"
 #include <climits>
@@ -86,10 +88,10 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGList::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
   
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(NULL);
 
   AvailableQueue->initNodes(SUnits);
   
@@ -106,17 +108,17 @@ void ScheduleDAGList::Schedule() {
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
   SUnit *SuccSU = D.getSUnit();
-  --SuccSU->NumPredsLeft;
-  
+
 #ifndef NDEBUG
-  if (SuccSU->NumPredsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (SuccSU->NumPredsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --SuccSU->NumPredsLeft;
+
   SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
   
   // If all the node's predecessors are scheduled, this node is ready
@@ -140,7 +142,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -232,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
     } else if (!HasNoopHazards) {
       // Otherwise, we have a pipeline stall, but no other problem, just advance
       // the current cycle and try again.
-      DOUT << "*** Advancing cycle, no work to do\n";
+      DEBUG(errs() << "*** Advancing cycle, no work to do\n");
       HazardRec->AdvanceCycle();
       ++NumStalls;
       ++CurCycle;
@@ -240,7 +242,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       // Otherwise, we have no instructions to issue and we have instructions
       // that will fault if we don't do this right.  This is the case for
       // processors without pipeline interlocks and other cases.
-      DOUT << "*** Emitting noop\n";
+      DEBUG(errs() << "*** Emitting noop\n");
       HazardRec->EmitNoop();
       Sequence.push_back(0);   // NULL here means noop
       ++NumNoops;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c97e2a8..cec24e6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -25,10 +25,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
@@ -163,14 +165,14 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(NULL);
 
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
@@ -195,17 +197,17 @@ void ScheduleDAGRRList::Schedule() {
 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
-  --PredSU->NumSuccsLeft;
-  
+
 #ifndef NDEBUG
-  if (PredSU->NumSuccsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (PredSU->NumSuccsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --PredSU->NumSuccsLeft;
+
   // If all the node's successors are scheduled, this node is ready
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -237,7 +239,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -276,13 +278,14 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
       AvailableQueue->remove(PredSU);
   }
 
+  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
   ++PredSU->NumSuccsLeft;
 }
 
 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
-  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
+  DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
   DEBUG(SU->dump(this));
 
   AvailableQueue->UnscheduledNode(SU);
@@ -351,7 +354,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   SUnit *NewSU;
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
-    MVT VT = N->getValueType(i);
+    EVT VT = N->getValueType(i);
     if (VT == MVT::Flag)
       return NULL;
     else if (VT == MVT::Other)
@@ -359,7 +362,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   }
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
-    MVT VT = Op.getNode()->getValueType(Op.getResNo());
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
     if (VT == MVT::Flag)
       return NULL;
   }
@@ -369,7 +372,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -488,7 +491,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = CreateClone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -570,7 +573,7 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
 /// definition of the specified node.
 /// FIXME: Move to SelectionDAG?
-static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
   assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
@@ -753,7 +756,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
         const TargetRegisterClass *RC =
           TRI->getPhysicalRegisterRegClass(Reg, VT);
         const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -769,8 +772,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DOUT << "Adding an edge from SU #" << TrySU->NodeNum
-               << " to SU #" << Copies.front()->NodeNum << "\n";
+          DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
                               /*isMustAlias=*/false,
@@ -778,8 +781,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DOUT << "Adding an edge from SU #" << NewDef->NodeNum
-             << " to SU #" << TrySU->NodeNum << "\n";
+        DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
                              /*Reg=*/0, /*isNormalMemory=*/false,
@@ -822,17 +825,17 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
-  --SuccSU->NumPredsLeft;
-  
+
 #ifndef NDEBUG
-  if (SuccSU->NumPredsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (SuccSU->NumPredsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --SuccSU->NumPredsLeft;
+
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
@@ -856,7 +859,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
@@ -1215,7 +1218,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
     if (!SUImpDefs)
       return false;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-      MVT VT = N->getValueType(i);
+      EVT VT = N->getValueType(i);
       if (VT == MVT::Flag || VT == MVT::Other)
         continue;
       if (!N->hasAnyUseOfValue(i))
@@ -1328,9 +1331,9 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 
     // Ok, the transformation is safe and the heuristics suggest it is
     // profitable. Update the graph.
-    DOUT << "Prescheduling SU # " << SU->NodeNum
-         << " next to PredSU # " << PredSU->NodeNum
-         << " to guide scheduling in the presence of multiple uses\n";
+    DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum
+                 << " next to PredSU # " << PredSU->NodeNum
+                 << " to guide scheduling in the presence of multiple uses\n");
     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
       SDep Edge = PredSU->Succs[i];
       assert(!Edge.isAssignedRegDep());
@@ -1418,8 +1421,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
-          DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
-               << " to SU #" << SuccSU->NodeNum << "\n";
+          DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # "
+                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
           scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
                                         /*Reg=*/0, /*isNormalMemory=*/false,
                                         /*isMustAlias=*/false,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 7aa15bc..d53de34 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -14,10 +14,12 @@
 
 #define DEBUG_TYPE "pre-RA-sched"
 #include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -152,6 +154,11 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   // Pass 2: add the preds, succs, etc.
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
     SUnit *SU = &SUnits[su];
@@ -175,7 +182,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
       if (N->isMachineOpcode() &&
           TII->get(N->getMachineOpcode()).getImplicitDefs()) {
         SU->hasPhysRegClobbers = true;
-        unsigned NumUsed = CountResults(N);
+        unsigned NumUsed = InstrEmitter::CountResults(N);
         while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
           --NumUsed;    // Skip over unused values at the end.
         if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
@@ -189,7 +196,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         assert(OpSU && "Node has no SUnit!");
         if (OpSU == SU) continue;           // In the same group.
 
-        MVT OpVT = N->getOperand(i).getValueType();
+        EVT OpVT = N->getOperand(i).getValueType();
         assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
         bool isChain = OpVT == MVT::Other;
 
@@ -206,8 +213,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         // dependency. This may change in the future though.
         if (Cost >= 0)
           PhysReg = 0;
-        SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
-                         OpSU->Latency, PhysReg));
+
+        const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+                               OpSU->Latency, PhysReg);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+        }
+
+        SU->addPred(dep);
       }
     }
   }
@@ -217,7 +231,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 /// are input.  This SUnit graph is similar to the SelectionDAG, but
 /// excludes nodes that aren't interesting to scheduling, and represents
 /// flagged together nodes with a single SUnit.
-void ScheduleDAGSDNodes::BuildSchedGraph() {
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
   // Populate the SUnits array.
   BuildSchedUnits();
   // Compute all the scheduling dependencies between nodes.
@@ -230,65 +244,68 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   // Compute the latency for the node.  We use the sum of the latencies for
   // all nodes flagged together into this SUnit.
   SU->Latency = 0;
-  bool SawMachineOpcode = false;
   for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
     if (N->isMachineOpcode()) {
-      SawMachineOpcode = true;
-      SU->Latency +=
-        InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+      SU->Latency += InstrItins.
+        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
     }
 }
 
-/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
-/// not go into the resulting MachineInstr).
-unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) {
-  unsigned N = Node->getNumValues();
-  while (N && Node->getValueType(N - 1) == MVT::Flag)
-    --N;
-  if (N && Node->getValueType(N - 1) == MVT::Other)
-    --N;    // Skip over chain result.
-  return N;
-}
-
-/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by special operands that describe memory references, then an
-/// optional chain operand, then an optional flag operand.  Compute the number
-/// of actual operands that will go into the resulting MachineInstr.
-unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) {
-  unsigned N = ComputeMemOperandsEnd(Node);
-  while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))
-    --N; // Ignore MEMOPERAND nodes
-  return N;
-}
-
-/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode
-/// operand
-unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
-  unsigned N = Node->getNumOperands();
-  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
-    --N;
-  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
-    --N; // Ignore chain if it exists.
-  return N;
-}
-
-
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   if (!SU->getNode()) {
-    cerr << "PHYS REG COPY\n";
+    errs() << "PHYS REG COPY\n";
     return;
   }
 
   SU->getNode()->dump(DAG);
-  cerr << "\n";
+  errs() << "\n";
   SmallVector<SDNode *, 4> FlaggedNodes;
   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
     FlaggedNodes.push_back(N);
   while (!FlaggedNodes.empty()) {
-    cerr << "    ";
+    errs() << "    ";
     FlaggedNodes.back()->dump(DAG);
-    cerr << "\n";
+    errs() << "\n";
     FlaggedNodes.pop_back();
   }
 }
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    // For pre-regalloc scheduling, create instructions corresponding to the
+    // SDNode and any flagged SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
+    SmallVector<SDNode *, 4> FlaggedNodes;
+    for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
+         N = N->getFlaggedNode())
+      FlaggedNodes.push_back(N);
+    while (!FlaggedNodes.empty()) {
+      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+                       VRBaseMap, EM);
+      FlaggedNodes.pop_back();
+    }
+    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+                     VRBaseMap, EM);
+  }
+
+  BB = Emitter.getBlock();
+  InsertPos = Emitter.getInsertPos();
+  return BB;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2a278b7..c9c36f7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -58,7 +58,6 @@ namespace llvm {
       if (isa<ConstantPoolSDNode>(Node))   return true;
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
-      if (isa<MemOperandSDNode>(Node))     return true;
       if (Node->getOpcode() == ISD::EntryToken) return true;
       return false;
     }
@@ -87,35 +86,14 @@ namespace llvm {
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
-    virtual void BuildSchedGraph();
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
 
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
 
-    /// CountResults - The results of target nodes have register or immediate
-    /// operands first, then an optional chain, and optional flag operands
-    /// (which do not go into the machine instrs.)
-    static unsigned CountResults(SDNode *Node);
-
-    /// CountOperands - The inputs to target nodes have any actual inputs first,
-    /// followed by special operands that describe memory references, then an
-    /// optional chain operand, then flag operands.  Compute the number of
-    /// actual operands that will go into the resulting MachineInstr.
-    static unsigned CountOperands(SDNode *Node);
-
-    /// ComputeMemOperandsEnd - Find the index one past the last
-    /// MemOperandSDNode operand
-    static unsigned ComputeMemOperandsEnd(SDNode *Node);
-
-    /// EmitNode - Generate machine code for an node and needed dependencies.
-    /// VRBaseMap contains, for each already emitted node, the first virtual
-    /// register number for the results of the node.
-    ///
-    void EmitNode(SDNode *Node, bool IsClone, bool HasClone,
-                  DenseMap<SDValue, unsigned> &VRBaseMap);
-    
-    virtual MachineBasicBlock *EmitSchedule();
+    virtual MachineBasicBlock *
+    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
 
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
@@ -129,47 +107,6 @@ namespace llvm {
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
   private:
-    /// EmitSubregNode - Generate machine code for subreg nodes.
-    ///
-    void EmitSubregNode(SDNode *Node, 
-                        DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS
-    /// nodes.
-    ///
-    void EmitCopyToRegClassNode(SDNode *Node,
-                                DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// getVR - Return the virtual register corresponding to the specified result
-    /// of the specified node.
-    unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap);
-  
-    /// getDstOfCopyToRegUse - If the only use of the specified result number of
-    /// node is a CopyToReg, return its destination register. Return 0 otherwise.
-    unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const;
-
-    void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum,
-                    const TargetInstrDesc *II,
-                    DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// AddRegisterOperand - Add the specified register as an operand to the
-    /// specified machine instr. Insert register copies if the register is
-    /// not in the required register class.
-    void AddRegisterOperand(MachineInstr *MI, SDValue Op,
-                            unsigned IIOpNum, const TargetInstrDesc *II,
-                            DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
-    /// implicit physical register output.
-    void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
-                         bool IsCloned, unsigned SrcReg,
-                         DenseMap<SDValue, unsigned> &VRBaseMap);
-    
-    void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
-                                const TargetInstrDesc &II, bool IsClone,
-                                bool IsCloned,
-                                DenseMap<SDValue, unsigned> &VRBaseMap);
-
     /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
     void BuildSchedUnits();
     void AddSchedEdges();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c8f4b52..542bf64 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Constants.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
@@ -31,6 +32,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,14 +48,14 @@ using namespace llvm;
 
 /// makeVTList - Return an instance of the SDVTList struct initialized with the
 /// specified members.
-static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) {
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
   SDVTList Res = {VTs, NumVTs};
   return Res;
 }
 
-static const fltSemantics *MVTToAPFloatSemantics(MVT VT) {
-  switch (VT.getSimpleVT()) {
-  default: assert(0 && "Unknown FP format");
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
   case MVT::f32:     return &APFloat::IEEEsingle;
   case MVT::f64:     return &APFloat::IEEEdouble;
   case MVT::f80:     return &APFloat::x87DoubleExtended;
@@ -76,7 +78,7 @@ bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
   return getValueAPF().bitwiseIsEqual(V);
 }
 
-bool ConstantFPSDNode::isValueValidForType(MVT VT,
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
                                            const APFloat& Val) {
   assert(VT.isFloatingPoint() && "Can only convert between FP types");
 
@@ -88,7 +90,7 @@ bool ConstantFPSDNode::isValueValidForType(MVT VT,
   // convert modifies in place, so make a copy.
   APFloat Val2 = APFloat(Val);
   bool losesInfo;
-  (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+  (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
                       &losesInfo);
   return !losesInfo;
 }
@@ -243,7 +245,7 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
 /// if the operation does not depend on the sign of the input (setne and seteq).
 static int isSignedOp(ISD::CondCode Opcode) {
   switch (Opcode) {
-  default: assert(0 && "Illegal integer setcc operation!");
+  default: llvm_unreachable("Illegal integer setcc operation!");
   case ISD::SETEQ:
   case ISD::SETNE: return 0;
   case ISD::SETLT:
@@ -363,11 +365,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   switch (N->getOpcode()) {
   case ISD::TargetExternalSymbol:
   case ISD::ExternalSymbol:
-    assert(0 && "Should only be used on nodes with operands");
+    llvm_unreachable("Should only be used on nodes with operands");
   default: break;  // Normal nodes don't need extra info.
-  case ISD::ARG_FLAGS:
-    ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());
-    break;
   case ISD::TargetConstant:
   case ISD::Constant:
     ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
@@ -403,11 +402,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
-  case ISD::MEMOPERAND: {
-    const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO;
-    MO.Profile(ID);
-    break;
-  }
   case ISD::FrameIndex:
   case ISD::TargetFrameIndex:
     ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
@@ -429,12 +423,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddInteger(CP->getTargetFlags());
     break;
   }
-  case ISD::CALL: {
-    const CallSDNode *Call = cast<CallSDNode>(N);
-    ID.AddInteger(Call->getCallingConv());
-    ID.AddInteger(Call->isVarArg());
-    break;
-  }
   case ISD::LOAD: {
     const LoadSDNode *LD = cast<LoadSDNode>(N);
     ID.AddInteger(LD->getMemoryVT().getRawBits());
@@ -466,7 +454,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   case ISD::VECTOR_SHUFFLE: {
     const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
-    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); 
+    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
          i != e; ++i)
       ID.AddInteger(SVN->getMaskElt(i));
     break;
@@ -488,20 +476,18 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
 }
 
 /// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries alignment, volatility, indexing mode, and
+/// the CSE map that carries volatility, indexing mode, and
 /// extension/truncation information.
 ///
 static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM,
-                     bool isVolatile, unsigned Alignment) {
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
   assert((ConvType & 3) == ConvType &&
          "ConvType may not require more than 2 bits!");
   assert((AM & 7) == AM &&
          "AM may not require more than 3 bits!");
   return ConvType |
          (AM << 2) |
-         (isVolatile << 5) |
-         ((Log2_32(Alignment) + 1) << 6);
+         (isVolatile << 5);
 }
 
 //===----------------------------------------------------------------------===//
@@ -519,7 +505,6 @@ static bool doNotCSE(SDNode *N) {
   case ISD::DBG_LABEL:
   case ISD::DBG_STOPPOINT:
   case ISD::EH_LABEL:
-  case ISD::DECLARE:
     return true;   // Never CSE these nodes.
   }
 
@@ -626,7 +611,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   bool Erased = false;
   switch (N->getOpcode()) {
   case ISD::EntryToken:
-    assert(0 && "EntryToken should not be in CSEMaps!");
+    llvm_unreachable("EntryToken should not be in CSEMaps!");
     return false;
   case ISD::HANDLENODE: return false;  // noop.
   case ISD::CONDCODE:
@@ -646,12 +631,12 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
     break;
   }
   case ISD::VALUETYPE: {
-    MVT VT = cast<VTSDNode>(N)->getVT();
+    EVT VT = cast<VTSDNode>(N)->getVT();
     if (VT.isExtended()) {
       Erased = ExtendedValueTypeNodes.erase(VT);
     } else {
-      Erased = ValueTypeNodes[VT.getSimpleVT()] != 0;
-      ValueTypeNodes[VT.getSimpleVT()] = 0;
+      Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+      ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
     }
     break;
   }
@@ -667,8 +652,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
       !N->isMachineOpcode() && !doNotCSE(N)) {
     N->dump(this);
-    cerr << "\n";
-    assert(0 && "Node is not in map!");
+    errs() << "\n";
+    llvm_unreachable("Node is not in map!");
   }
 #endif
   return Erased;
@@ -762,7 +747,7 @@ void SelectionDAG::VerifyNode(SDNode *N) {
   default:
     break;
   case ISD::BUILD_PAIR: {
-    MVT VT = N->getValueType(0);
+    EVT VT = N->getValueType(0);
     assert(N->getNumValues() == 1 && "Too many results!");
     assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
            "Wrong return type!");
@@ -780,7 +765,7 @@ void SelectionDAG::VerifyNode(SDNode *N) {
     assert(N->getValueType(0).isVector() && "Wrong return type!");
     assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
            "Wrong number of operands!");
-    MVT EltVT = N->getValueType(0).getVectorElementType();
+    EVT EltVT = N->getValueType(0).getVectorElementType();
     for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
       assert((I->getValueType() == EltVT ||
              (EltVT.isInteger() && I->getValueType().isInteger() &&
@@ -791,13 +776,13 @@ void SelectionDAG::VerifyNode(SDNode *N) {
   }
 }
 
-/// getMVTAlignment - Compute the default alignment value for the
+/// getEVTAlignment - Compute the default alignment value for the
 /// given type.
 ///
-unsigned SelectionDAG::getMVTAlignment(MVT VT) const {
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
   const Type *Ty = VT == MVT::iPTR ?
-                   PointerType::get(Type::Int8Ty, 0) :
-                   VT.getTypeForMVT();
+                   PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+                   VT.getTypeForEVT(*getContext());
 
   return TLI.getTargetData()->getABITypeAlignment(Ty);
 }
@@ -815,6 +800,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
   MF = &mf;
   MMI = mmi;
   DW = dw;
+  Context = &mf.getFunction()->getContext();
 }
 
 SelectionDAG::~SelectionDAG() {
@@ -846,7 +832,19 @@ void SelectionDAG::clear() {
   Root = getEntryNode();
 }
 
-SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
   if (Op.getValueType() == VT) return Op;
   APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),
                                    VT.getSizeInBits());
@@ -856,29 +854,29 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {
 
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 ///
-SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) {
-  MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   SDValue NegOne =
     getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
-SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) {
-  MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
   return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
 }
 
-SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) {
-  return getConstant(*ConstantInt::get(Val), VT, isT);
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+  return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
 }
 
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
-  MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
          "APInt size does not match type size!");
 
@@ -913,14 +911,14 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
 }
 
 
-SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) {
-  return getConstantFP(*ConstantFP::get(V), VT, isTarget);
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
 }
 
-SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
 
-  MVT EltVT =
+  EVT EltVT =
     VT.isVector() ? VT.getVectorElementType() : VT;
 
   // Do the map lookup using the actual bit pattern for the floating point
@@ -953,8 +951,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){
   return Result;
 }
 
-SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
-  MVT EltVT =
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+  EVT EltVT =
     VT.isVector() ? VT.getVectorElementType() : VT;
   if (EltVT==MVT::f32)
     return getConstantFP(APFloat((float)Val), VT, isTarget);
@@ -963,14 +961,15 @@ SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
 }
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
-                                       MVT VT, int64_t Offset,
+                                       EVT VT, int64_t Offset,
                                        bool isTargetGA,
                                        unsigned char TargetFlags) {
   assert((TargetFlags == 0 || isTargetGA) &&
          "Cannot set target flags on target-independent globals");
-  
+
   // Truncate (with sign-extension) the offset value to the pointer size.
-  unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+  EVT PTy = TLI.getPointerTy();
+  unsigned BitWidth = PTy.getSizeInBits();
   if (BitWidth < 64)
     Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
 
@@ -1002,7 +1001,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1017,7 +1016,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget,
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
                                    unsigned char TargetFlags) {
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent jump tables");
@@ -1036,9 +1035,9 @@ SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
+SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
                                       unsigned Alignment, int Offset,
-                                      bool isTarget, 
+                                      bool isTarget,
                                       unsigned char TargetFlags) {
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
@@ -1062,7 +1061,7 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
 }
 
 
-SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
                                       unsigned Alignment, int Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
@@ -1101,26 +1100,13 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) {
-  FoldingSetNodeID ID;
-  AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0);
-  ID.AddInteger(Flags.getRawBits());
-  void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-    return SDValue(E, 0);
-  SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>();
-  new (N) ARG_FLAGSSDNode(Flags);
-  CSEMap.InsertNode(N, IP);
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
-SDValue SelectionDAG::getValueType(MVT VT) {
-  if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size())
-    ValueTypeNodes.resize(VT.getSimpleVT()+1);
+SDValue SelectionDAG::getValueType(EVT VT) {
+  if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+      ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
 
   SDNode *&N = VT.isExtended() ?
-    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()];
+    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
 
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<VTSDNode>();
@@ -1129,7 +1115,7 @@ SDValue SelectionDAG::getValueType(MVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
@@ -1138,7 +1124,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT,
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
                                               unsigned char TargetFlags) {
   SDNode *&N =
     TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
@@ -1177,19 +1163,19 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
   }
 }
 
-SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, 
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
                                        SDValue N2, const int *Mask) {
   assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
-  assert(VT.isVector() && N1.getValueType().isVector() && 
+  assert(VT.isVector() && N1.getValueType().isVector() &&
          "Vector Shuffle VTs must be a vectors");
   assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
          && "Vector Shuffle VTs must have same element type");
 
   // Canonicalize shuffle undef, undef -> undef
   if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
-    return N1;
+    return getUNDEF(VT);
 
-  // Validate that all indices in Mask are within the range of the elements 
+  // Validate that all indices in Mask are within the range of the elements
   // input to the shuffle.
   unsigned NElts = VT.getVectorNumElements();
   SmallVector<int, 8> MaskVec;
@@ -1197,18 +1183,18 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
     assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
     MaskVec.push_back(Mask[i]);
   }
-  
+
   // Canonicalize shuffle v, v -> v, undef
   if (N1 == N2) {
     N2 = getUNDEF(VT);
     for (unsigned i = 0; i != NElts; ++i)
       if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
   }
-  
+
   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   if (N1.getOpcode() == ISD::UNDEF)
     commuteShuffle(N1, N2, MaskVec);
-  
+
   // Canonicalize all index into lhs, -> shuffle lhs, undef
   // Canonicalize all index into rhs, -> shuffle rhs, undef
   bool AllLHS = true, AllRHS = true;
@@ -1231,7 +1217,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
     N1 = getUNDEF(VT);
     commuteShuffle(N1, N2, MaskVec);
   }
-  
+
   // If Identity shuffle, or all shuffle in to undef, return that node.
   bool AllUndef = true;
   bool Identity = true;
@@ -1239,7 +1225,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
     if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
     if (MaskVec[i] >= 0) AllUndef = false;
   }
-  if (Identity)
+  if (Identity && NElts == N1.getValueType().getVectorNumElements())
     return N1;
   if (AllUndef)
     return getUNDEF(VT);
@@ -1249,17 +1235,17 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
   AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
   for (unsigned i = 0; i != NElts; ++i)
     ID.AddInteger(MaskVec[i]);
-  
+
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-  
+
   // Allocate the mask array for the node out of the BumpPtrAllocator, since
   // SDNode doesn't have access to it.  This memory will be "leaked" when
   // the node is deallocated, but recovered when the NodeAllocator is released.
   int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
   memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
-  
+
   ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
   new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
   CSEMap.InsertNode(N, IP);
@@ -1267,7 +1253,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
                                        SDValue Val, SDValue DTy,
                                        SDValue STy, SDValue Rnd, SDValue Sat,
                                        ISD::CvtCode Code) {
@@ -1289,7 +1275,7 @@ SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
   ID.AddInteger(RegNo);
@@ -1305,7 +1291,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {
 
 SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,
                                       unsigned Line, unsigned Col,
-                                      Value *CU) {
+                                      MDNode *CU) {
   SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();
   new (N) DbgStopPointSDNode(Root, Line, Col, CU);
   N->setDebugLoc(DL);
@@ -1349,32 +1335,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) {
-#ifndef NDEBUG
-  const Value *v = MO.getValue();
-  assert((!v || isa<PointerType>(v->getType())) &&
-         "SrcValue is not a pointer?");
-#endif
-
-  FoldingSetNodeID ID;
-  AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0);
-  MO.Profile(ID);
-
-  void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-    return SDValue(E, 0);
-
-  SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>();
-  new (N) MemOperandSDNode(MO);
-  CSEMap.InsertNode(N, IP);
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
 /// getShiftAmountOperand - Return the specified value casted to
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
-  MVT OpTy = Op.getValueType();
+  EVT OpTy = Op.getValueType();
   MVT ShTy = TLI.getShiftAmountTy();
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
@@ -1384,10 +1348,10 @@ SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
 
 /// CreateStackTemporary - Create a stack temporary, suitable for holding the
 /// specified value type.
-SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
-  unsigned ByteSize = VT.getStoreSizeInBits()/8;
-  const Type *Ty = VT.getTypeForMVT();
+  unsigned ByteSize = VT.getStoreSize();
+  const Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
   std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
 
@@ -1397,11 +1361,11 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {
 
 /// CreateStackTemporary - Create a stack temporary suitable for holding
 /// either of the specified value types.
-SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
   unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
                             VT2.getStoreSizeInBits())/8;
-  const Type *Ty1 = VT1.getTypeForMVT();
-  const Type *Ty2 = VT2.getTypeForMVT();
+  const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+  const Type *Ty2 = VT2.getTypeForEVT(*getContext());
   const TargetData *TD = TLI.getTargetData();
   unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
                             TD->getPrefTypeAlignment(Ty2));
@@ -1411,7 +1375,7 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {
   return getFrameIndex(FrameIdx, TLI.getPointerTy());
 }
 
-SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
                                 SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
   // These setcc operations always fold.
   switch (Cond) {
@@ -1441,7 +1405,7 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
       const APInt &C1 = N1C->getAPIntValue();
 
       switch (Cond) {
-      default: assert(0 && "Unknown integer setcc!");
+      default: llvm_unreachable("Unknown integer setcc!");
       case ISD::SETEQ:  return getConstant(C1 == C2, VT);
       case ISD::SETNE:  return getConstant(C1 != C2, VT);
       case ISD::SETULT: return getConstant(C1.ult(C2), VT);
@@ -1516,6 +1480,10 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
 /// use this predicate to simplify operations downstream.
 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+  // This predicate is not safe for vector operations.
+  if (Op.getValueType().isVector())
+    return false;
+
   unsigned BitWidth = Op.getValueSizeInBits();
   return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
 }
@@ -1743,7 +1711,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     }
     return;
   case ISD::SIGN_EXTEND_INREG: {
-    MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     unsigned EBits = EVT.getSizeInBits();
 
     // Sign extension.  Compute the demanded bits in the result that are not
@@ -1788,14 +1756,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::LOAD: {
     if (ISD::isZEXTLoad(Op.getNode())) {
       LoadSDNode *LD = cast<LoadSDNode>(Op);
-      MVT VT = LD->getMemoryVT();
+      EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
     }
     return;
   }
   case ISD::ZERO_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
     APInt InMask    = Mask;
@@ -1809,7 +1777,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::SIGN_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
@@ -1850,7 +1818,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::ANY_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InMask = Mask;
     InMask.trunc(InBits);
@@ -1862,7 +1830,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::TRUNCATE: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InMask = Mask;
     InMask.zext(InBits);
@@ -1875,7 +1843,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     break;
   }
   case ISD::AssertZext: {
-    MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
     ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
                       KnownOne, Depth+1);
@@ -1981,7 +1949,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
-      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+                                         Depth);
     }
     return;
   }
@@ -1993,7 +1962,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 /// information.  For example, immediately after an "SRA X, 2", we know that
 /// the top 3 bits are all equal to each other, so we return 3.
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(VT.isInteger() && "Invalid VT!");
   unsigned VTBits = VT.getSizeInBits();
   unsigned Tmp, Tmp2;
@@ -2212,6 +2181,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
 }
 
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+  // If we're told that NaNs won't happen, assume they won't.
+  if (FiniteOnlyFPMath())
+    return true;
+
+  // If the value is a constant, we can obviously see if it is a NaN or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->getValueAPF().isNaN();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
 
 bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
@@ -2228,7 +2210,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
 /// element of the result of the vector shuffle.
 SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
                                           unsigned i) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
   if (N->getMaskElt(i) < 0)
     return getUNDEF(VT.getVectorElementType());
@@ -2239,7 +2221,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
 
   if (V.getOpcode() == ISD::BIT_CONVERT) {
     V = V.getOperand(0);
-    MVT VVT = V.getValueType();
+    EVT VVT = V.getValueType();
     if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
       return SDValue();
   }
@@ -2256,7 +2238,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
 
 /// getNode - Gets or creates the specified node.
 ///
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
   void *IP = 0;
@@ -2274,7 +2256,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              MVT VT, SDValue Operand) {
+                              EVT VT, SDValue Operand) {
   // Constant fold unary operations with an integer constant operand.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
     const APInt &Val = C->getAPIntValue();
@@ -2332,7 +2314,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         bool ignored;
         // This can return overflow, underflow, or inexact; we don't care.
         // FIXME need to be more flexible about rounding mode.
-        (void)V.convert(*MVTToAPFloatSemantics(VT),
+        (void)V.convert(*EVTToAPFloatSemantics(VT),
                         APFloat::rmNearestTiesToEven, &ignored);
         return getConstantFP(V, VT);
       }
@@ -2366,7 +2348,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   case ISD::MERGE_VALUES:
   case ISD::CONCAT_VECTORS:
     return Operand;         // Factor, merge or concat of one node?  No need.
-  case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node");
+  case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
   case ISD::FP_EXTEND:
     assert(VT.isFloatingPoint() &&
            Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
@@ -2487,7 +2469,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
 }
 
 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
-                                             MVT VT,
+                                             EVT VT,
                                              ConstantSDNode *Cst1,
                                              ConstantSDNode *Cst2) {
   const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
@@ -2522,7 +2504,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
   return SDValue();
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -2624,7 +2606,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
       return N1;
     break;
   case ISD::FP_ROUND_INREG: {
-    MVT EVT = cast<VTSDNode>(N2)->getVT();
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg round!");
     assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
            "Cannot FP_ROUND_INREG integer types");
@@ -2641,7 +2623,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     break;
   case ISD::AssertSext:
   case ISD::AssertZext: {
-    MVT EVT = cast<VTSDNode>(N2)->getVT();
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
@@ -2650,7 +2632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     break;
   }
   case ISD::SIGN_EXTEND_INREG: {
-    MVT EVT = cast<VTSDNode>(N2)->getVT();
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
@@ -2688,13 +2670,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     // expanding large vector constants.
     if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
       SDValue Elt = N1.getOperand(N2C->getZExtValue());
-      if (Elt.getValueType() != VT) {
+      EVT VEltTy = N1.getValueType().getVectorElementType();
+      if (Elt.getValueType() != VEltTy) {
         // If the vector element type is not legal, the BUILD_VECTOR operands
         // are promoted and implicitly truncated.  Make that explicit here.
-        assert(VT.isInteger() && Elt.getValueType().isInteger() &&
-               VT.bitsLE(Elt.getValueType()) &&
-               "Bad type for BUILD_VECTOR operand");
-        Elt = getNode(ISD::TRUNCATE, DL, VT, Elt);
+        Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+      }
+      if (VT != VEltTy) {
+        // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+        // result is implicitly extended.
+        Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
       }
       return Elt;
     }
@@ -2895,7 +2880,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3) {
   // Perform various simplifications.
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
@@ -2938,7 +2923,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     }
     break;
   case ISD::VECTOR_SHUFFLE:
-    assert(0 && "should use getVectorShuffle constructor!");
+    llvm_unreachable("should use getVectorShuffle constructor!");
     break;
   case ISD::BIT_CONVERT:
     // Fold bit_convert nodes from a type to themselves.
@@ -2971,23 +2956,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3,
                               SDValue N4) {
   SDValue Ops[] = { N1, N2, N3, N4 };
   return getNode(Opcode, DL, VT, Ops, 4);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3,
                               SDValue N4, SDValue N5) {
   SDValue Ops[] = { N1, N2, N3, N4, N5 };
   return getNode(Opcode, DL, VT, Ops, 5);
 }
 
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+  SmallVector<SDValue, 8> ArgChains;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument.
+  for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+       UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0)
+          ArgChains.push_back(SDValue(L, 1));
+
+  // Build a tokenfactor for all the chains.
+  return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                 &ArgChains[0], ArgChains.size());
+}
+
 /// getMemsetValue - Vectorized representation of the memset value
 /// operand.
-static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               DebugLoc dl) {
   unsigned NumBits = VT.isVector() ?
     VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
@@ -3021,9 +3029,9 @@ static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,
 /// getMemsetStringVal - Similar to getMemsetValue. Except this is only
 /// used when a memcpy is turned into a memset when the source is a constant
 /// string ptr.
-static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
-                                    const TargetLowering &TLI,
-                                    std::string &Str, unsigned Offset) {
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  std::string &Str, unsigned Offset) {
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
@@ -3031,7 +3039,8 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
     unsigned NumElts = VT.getVectorNumElements();
     MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                       DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts)));
+                       DAG.getConstant(0,
+                       EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
@@ -3051,7 +3060,7 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
 ///
 static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
                                       SelectionDAG &DAG) {
-  MVT VT = Base.getValueType();
+  EVT VT = Base.getValueType();
   return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
                      VT, Base, DAG.getConstant(Offset, VT));
 }
@@ -3083,7 +3092,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
 /// to replace the memset / memcpy is below the threshold. It also returns the
 /// types of the sequence of memory ops to perform memset / memcpy.
 static
-bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
+bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
                               SDValue Dst, SDValue Src,
                               unsigned Limit, uint64_t Size, unsigned &Align,
                               std::string &Str, bool &isSrcStr,
@@ -3091,11 +3100,11 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
                               const TargetLowering &TLI) {
   isSrcStr = isMemSrcFromString(Src, Str);
   bool isSrcConst = isa<ConstantSDNode>(Src);
-  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
-  MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
+  EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
+  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
   if (VT != MVT::iAny) {
-    unsigned NewAlign = (unsigned)
-      TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());
+    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
     // If source is a string constant, this will require an unaligned load.
     if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
       if (Dst.getOpcode() != ISD::FrameIndex) {
@@ -3120,7 +3129,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
   }
 
   if (VT == MVT::iAny) {
-    if (AllowUnalign) {
+    if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
       VT = MVT::i64;
     } else {
       switch (Align & 7) {
@@ -3133,7 +3142,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
 
     MVT LVT = MVT::i64;
     while (!TLI.isTypeLegal(LVT))
-      LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1);
+      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
     assert(LVT.isInteger());
 
     if (VT.bitsGT(LVT))
@@ -3148,12 +3157,12 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
       if (VT.isVector()) {
         VT = MVT::i64;
         while (!TLI.isTypeLegal(VT))
-          VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
         VTSize = VT.getSizeInBits() / 8;
       } else {
         // This can result in a type that is not legal on the target, e.g.
         // 1 or 2 bytes on PPC.
-        VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
         VTSize >>= 1;
       }
     }
@@ -3177,7 +3186,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   // Expand memcpy to a series of load and store ops if the size operand falls
   // below a certain threshold.
-  std::vector<MVT> MemOps;
+  std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemcpy();
@@ -3193,8 +3202,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> OutChains;
   unsigned NumMemOps = MemOps.size();
   uint64_t SrcOff = 0, DstOff = 0;
-  for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+  for (unsigned i = 0; i != NumMemOps; ++i) {
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
@@ -3214,7 +3223,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
       // to Load/Store if NVT==VT.
       // FIXME does the case above also need this?
-      MVT NVT = TLI.getTypeToTransformTo(VT);
+      EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
@@ -3242,7 +3251,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   // Expand memmove to a series of load and store ops if the size operand falls
   // below a certain threshold.
-  std::vector<MVT> MemOps;
+  std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemmove();
@@ -3260,7 +3269,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> OutChains;
   unsigned NumMemOps = MemOps.size();
   for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
@@ -3275,7 +3284,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                       &LoadChains[0], LoadChains.size());
   OutChains.clear();
   for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
@@ -3299,7 +3308,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   // Expand memset to a series of load/store ops if the size operand
   // falls below a certain threshold.
-  std::vector<MVT> MemOps;
+  std::vector<EVT> MemOps;
   std::string Str;
   bool CopyFromStr;
   if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
@@ -3311,7 +3320,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   unsigned NumMemOps = MemOps.size();
   for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value = getMemsetValue(Src, VT, DAG, dl);
     SDValue Store = DAG.getStore(Chain, dl, Value,
@@ -3368,15 +3377,18 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME: pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
-    TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, 0, CallingConv::C, false,
-                    getExternalSymbol("memcpy", TLI.getPointerTy()),
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+                                      TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
 }
@@ -3414,15 +3426,18 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME:  pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
-    TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, 0, CallingConv::C, false,
-                    getExternalSymbol("memmove", TLI.getPointerTy()),
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+                                      TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
 }
@@ -3456,7 +3471,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     return Result;
 
   // Emit a library call.
-  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -3466,31 +3481,61 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
   else
     Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
-  Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+  Entry.Node = Src;
+  Entry.Ty = Type::getInt32Ty(*getContext());
+  Entry.isSExt = true;
   Args.push_back(Entry);
-  Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+  Entry.Node = Size;
+  Entry.Ty = IntPtrTy;
+  Entry.isSExt = false;
   Args.push_back(Entry);
   // FIXME: pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
-    TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, 0, CallingConv::C, false,
-                    getExternalSymbol("memset", TLI.getPointerTy()),
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+                                      TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
                                 SDValue Chain,
                                 SDValue Ptr, SDValue Cmp,
                                 SDValue Swp, const Value* PtrVal,
                                 unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  // Check if the memory reference references a frame index
+  if (!PtrVal)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrVal, Flags, 0,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachineMemOperand *MMO) {
   assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
 
-  MVT VT = Cmp.getValueType();
-
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(MemVT);
+  EVT VT = Cmp.getValueType();
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   FoldingSetNodeID ID;
@@ -3498,21 +3543,48 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
   void* IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
-                       Chain, Ptr, Cmp, Swp, PtrVal, Alignment);
+  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
                                 SDValue Chain,
                                 SDValue Ptr, SDValue Val,
                                 const Value* PtrVal,
                                 unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  // Check if the memory reference references a frame index
+  if (!PtrVal)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrVal, Flags, 0,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                MachineMemOperand *MMO) {
   assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
           Opcode == ISD::ATOMIC_LOAD_SUB ||
           Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -3526,10 +3598,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
           Opcode == ISD::ATOMIC_SWAP) &&
          "Invalid Atomic Op");
 
-  MVT VT = Val.getValueType();
-
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(MemVT);
+  EVT VT = Val.getValueType();
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   FoldingSetNodeID ID;
@@ -3537,11 +3606,12 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
   SDValue Ops[] = {Chain, Ptr, Val};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
   void* IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
-                       Chain, Ptr, Val, PtrVal, Alignment);
+  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3554,7 +3624,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
   if (NumOps == 1)
     return Ops[0];
 
-  SmallVector<MVT, 4> VTs;
+  SmallVector<EVT, 4> VTs;
   VTs.reserve(NumOps);
   for (unsigned i = 0; i < NumOps; ++i)
     VTs.push_back(Ops[i].getValueType());
@@ -3564,9 +3634,9 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
 
 SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
-                                  const MVT *VTs, unsigned NumVTs,
+                                  const EVT *VTs, unsigned NumVTs,
                                   const SDValue *Ops, unsigned NumOps,
-                                  MVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, const Value *srcValue, int SVOff,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
   return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
@@ -3577,81 +3647,104 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
 SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                                   const SDValue *Ops, unsigned NumOps,
-                                  MVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, const Value *srcValue, int SVOff,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
+  if (Align == 0)  // Ensure that codegen never sees alignment 0
+    Align = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = 0;
+  if (WriteMem)
+    Flags |= MachineMemOperand::MOStore;
+  if (ReadMem)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Vol)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(srcValue, Flags, SVOff,
+                            MemVT.getStoreSize(), Align);
+
+  return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachineMemOperand *MMO) {
+  assert((Opcode == ISD::INTRINSIC_VOID ||
+          Opcode == ISD::INTRINSIC_W_CHAIN ||
+          (Opcode <= INT_MAX &&
+           (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+         "Opcode is not a memory-accessing opcode!");
+
   // Memoize the node unless it returns a flag.
   MemIntrinsicSDNode *N;
   if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
       return SDValue(E, 0);
+    }
 
     N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
-                               srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
+    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
     CSEMap.InsertNode(N, IP);
   } else {
     N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
-                               srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
-  }
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
-SDValue
-SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
-                      bool IsTailCall, bool IsInreg, SDVTList VTs,
-                      const SDValue *Operands, unsigned NumOperands,
-                      unsigned NumFixedArgs) {
-  // Do not include isTailCall in the folding set profile.
-  FoldingSetNodeID ID;
-  AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands);
-  ID.AddInteger(CallingConv);
-  ID.AddInteger(IsVarArgs);
-  void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
-    // Instead of including isTailCall in the folding set, we just
-    // set the flag of the existing node.
-    if (!IsTailCall)
-      cast<CallSDNode>(E)->setNotTailCall();
-    return SDValue(E, 0);
+    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
   }
-  SDNode *N = NodeAllocator.Allocate<CallSDNode>();
-  new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg,
-                     VTs, Operands, NumOperands, NumFixedArgs);
-  CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
 SDValue
 SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
-                      ISD::LoadExtType ExtType, MVT VT, SDValue Chain,
+                      ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
-                      const Value *SV, int SVOffset, MVT EVT,
+                      const Value *SV, int SVOffset, EVT MemVT,
                       bool isVolatile, unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(VT);
+    Alignment = getEVTAlignment(VT);
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset,
+                            MemVT.getStoreSize(), Alignment);
+  return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO);
+}
 
-  if (VT == EVT) {
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+                      ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+                      SDValue Ptr, SDValue Offset, EVT MemVT,
+                      MachineMemOperand *MMO) {
+  if (VT == MemVT) {
     ExtType = ISD::NON_EXTLOAD;
   } else if (ExtType == ISD::NON_EXTLOAD) {
-    assert(VT == EVT && "Non-extending load from different memory type!");
+    assert(VT == MemVT && "Non-extending load from different memory type!");
   } else {
     // Extending load.
     if (VT.isVector())
-      assert(EVT.getVectorNumElements() == VT.getVectorNumElements() &&
+      assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() &&
              "Invalid vector extload!");
     else
-      assert(EVT.bitsLT(VT) &&
+      assert(MemVT.bitsLT(VT) &&
              "Should only be an extending load, not truncating!");
     assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&
            "Cannot sign/zero extend a FP/Vector load!");
-    assert(VT.isInteger() == EVT.isInteger() &&
+    assert(VT.isInteger() == MemVT.isInteger() &&
            "Cannot convert from FP to Int or Int -> FP!");
   }
 
@@ -3664,20 +3757,21 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
   SDValue Ops[] = { Chain, Ptr, Offset };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
-  ID.AddInteger(EVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment));
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
-  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset,
-                     Alignment, isVolatile);
+  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               const Value *SV, int SVOffset,
                               bool isVolatile, unsigned Alignment) {
@@ -3686,14 +3780,14 @@ SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,
                  SV, SVOffset, VT, isVolatile, Alignment);
 }
 
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
                                  const Value *SV,
-                                 int SVOffset, MVT EVT,
+                                 int SVOffset, EVT MemVT,
                                  bool isVolatile, unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, EVT, isVolatile, Alignment);
+                 SV, SVOffset, MemVT, isVolatile, Alignment);
 }
 
 SDValue
@@ -3711,25 +3805,43 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, const Value *SV, int SVOffset,
                                bool isVolatile, unsigned Alignment) {
-  MVT VT = Val.getValueType();
-
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(VT);
+    Alignment = getEVTAlignment(Val.getValueType());
 
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset,
+                            Val.getValueType().getStoreSize(), Alignment);
+
+  return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
   SDValue Ops[] = { Chain, Val, Ptr, Undef };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED,
-                                     isVolatile, Alignment));
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false,
-                      VT, SV, SVOffset, Alignment, isVolatile);
+  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3737,19 +3849,39 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     SDValue Ptr, const Value *SV,
-                                    int SVOffset, MVT SVT,
+                                    int SVOffset, EVT SVT,
                                     bool isVolatile, unsigned Alignment) {
-  MVT VT = Val.getValueType();
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(SVT);
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+
+  return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, EVT SVT,
+                                    MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
 
   if (VT == SVT)
-    return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment);
+    return getStore(Chain, dl, Val, Ptr, MMO);
 
   assert(VT.bitsGT(SVT) && "Not a truncation?");
   assert(VT.isInteger() == SVT.isInteger() &&
          "Can't do FP-INT conversion!");
 
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(VT);
 
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -3757,14 +3889,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(SVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED,
-                                     isVolatile, Alignment));
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true,
-                      SVT, SV, SVOffset, Alignment, isVolatile);
+  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3788,21 +3920,20 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
   new (N) StoreSDNode(Ops, dl, VTs, AM,
                       ST->isTruncatingStore(), ST->getMemoryVT(),
-                      ST->getSrcValue(), ST->getSrcValueOffset(),
-                      ST->getAlignment(), ST->isVolatile());
+                      ST->getMemOperand());
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl,
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
                                SDValue Chain, SDValue Ptr,
                                SDValue SV) {
   SDValue Ops[] = { Chain, Ptr, SV };
   return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               const SDUse *Ops, unsigned NumOps) {
   switch (NumOps) {
   case 0: return getNode(Opcode, DL, VT);
@@ -3818,7 +3949,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
   return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               const SDValue *Ops, unsigned NumOps) {
   switch (NumOps) {
   case 0: return getNode(Opcode, DL, VT);
@@ -3876,14 +4007,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              const std::vector<MVT> &ResultTys,
+                              const std::vector<EVT> &ResultTys,
                               const SDValue *Ops, unsigned NumOps) {
   return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
                  Ops, NumOps);
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              const MVT *VTs, unsigned NumVTs,
+                              const EVT *VTs, unsigned NumVTs,
                               const SDValue *Ops, unsigned NumOps) {
   if (NumVTs == 1)
     return getNode(Opcode, DL, VTs[0], Ops, NumOps);
@@ -3895,11 +4026,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
   if (VTList.NumVTs == 1)
     return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
 
+#if 0
   switch (Opcode) {
   // FIXME: figure out how to safely handle things like
   // int foo(int x) { return 1 << (x & 255); }
   // int bar() { return foo(256); }
-#if 0
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:
   case ISD::SHL_PARTS:
@@ -3915,8 +4046,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
     break;
-#endif
   }
+#endif
 
   // Memoize the node unless it returns a flag.
   SDNode *N;
@@ -3998,17 +4129,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
   return getNode(Opcode, DL, VTList, Ops, 5);
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT) {
+SDVTList SelectionDAG::getVTList(EVT VT) {
   return makeVTList(SDNode::getValueTypeList(VT), 1);
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
   for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
        E = VTList.rend(); I != E; ++I)
     if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
       return *I;
 
-  MVT *Array = Allocator.Allocate<MVT>(2);
+  EVT *Array = Allocator.Allocate<EVT>(2);
   Array[0] = VT1;
   Array[1] = VT2;
   SDVTList Result = makeVTList(Array, 2);
@@ -4016,14 +4147,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {
   return Result;
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
   for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
        E = VTList.rend(); I != E; ++I)
     if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
                           I->VTs[2] == VT3)
       return *I;
 
-  MVT *Array = Allocator.Allocate<MVT>(3);
+  EVT *Array = Allocator.Allocate<EVT>(3);
   Array[0] = VT1;
   Array[1] = VT2;
   Array[2] = VT3;
@@ -4032,14 +4163,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {
   return Result;
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
   for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
        E = VTList.rend(); I != E; ++I)
     if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
                           I->VTs[2] == VT3 && I->VTs[3] == VT4)
       return *I;
 
-  MVT *Array = Allocator.Allocate<MVT>(3);
+  EVT *Array = Allocator.Allocate<EVT>(3);
   Array[0] = VT1;
   Array[1] = VT2;
   Array[2] = VT3;
@@ -4049,9 +4180,9 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {
   return Result;
 }
 
-SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
   switch (NumVTs) {
-    case 0: assert(0 && "Cannot have nodes without results!");
+    case 0: llvm_unreachable("Cannot have nodes without results!");
     case 1: return getVTList(VTs[0]);
     case 2: return getVTList(VTs[0], VTs[1]);
     case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
@@ -4073,7 +4204,7 @@ SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {
       return *I;
   }
 
-  MVT *Array = Allocator.Allocate<MVT>(NumVTs);
+  EVT *Array = Allocator.Allocate<EVT>(NumVTs);
   std::copy(VTs, VTs+NumVTs, Array);
   SDVTList Result = makeVTList(Array, NumVTs);
   VTList.push_back(Result);
@@ -4215,20 +4346,20 @@ void SDNode::DropOperands() {
 /// machine opcode.
 ///
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT) {
+                                   EVT VT) {
   SDVTList VTs = getVTList(VT);
   return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, SDValue Op1) {
+                                   EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
   return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, SDValue Op1,
+                                   EVT VT, SDValue Op1,
                                    SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
@@ -4236,7 +4367,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, SDValue Op1,
+                                   EVT VT, SDValue Op1,
                                    SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
@@ -4244,41 +4375,41 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, const SDValue *Ops,
+                                   EVT VT, const SDValue *Ops,
                                    unsigned NumOps) {
   SDVTList VTs = getVTList(VT);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, const SDValue *Ops,
+                                   EVT VT1, EVT VT2, const SDValue *Ops,
                                    unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2) {
+                                   EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
   return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, MVT VT3,
+                                   EVT VT1, EVT VT2, EVT VT3,
                                    const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, MVT VT3, MVT VT4,
+                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
                                    const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2,
+                                   EVT VT1, EVT VT2,
                                    SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
@@ -4286,7 +4417,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2,
+                                   EVT VT1, EVT VT2,
                                    SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
@@ -4294,7 +4425,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2,
+                                   EVT VT1, EVT VT2,
                                    SDValue Op1, SDValue Op2,
                                    SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -4303,7 +4434,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, MVT VT3,
+                                   EVT VT1, EVT VT2, EVT VT3,
                                    SDValue Op1, SDValue Op2,
                                    SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
@@ -4318,20 +4449,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT) {
+                                  EVT VT) {
   SDVTList VTs = getVTList(VT);
   return MorphNodeTo(N, Opc, VTs, 0, 0);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, SDValue Op1) {
+                                  EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
   return MorphNodeTo(N, Opc, VTs, Ops, 1);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, SDValue Op1,
+                                  EVT VT, SDValue Op1,
                                   SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
@@ -4339,7 +4470,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, SDValue Op1,
+                                  EVT VT, SDValue Op1,
                                   SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
@@ -4347,34 +4478,34 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, const SDValue *Ops,
+                                  EVT VT, const SDValue *Ops,
                                   unsigned NumOps) {
   SDVTList VTs = getVTList(VT);
   return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2, const SDValue *Ops,
+                                  EVT VT1, EVT VT2, const SDValue *Ops,
                                   unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2);
   return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2) {
+                                  EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
   return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2, MVT VT3,
+                                  EVT VT1, EVT VT2, EVT VT3,
                                   const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2,
+                                  EVT VT1, EVT VT2,
                                   SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
@@ -4382,7 +4513,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2,
+                                  EVT VT1, EVT VT2,
                                   SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
@@ -4390,7 +4521,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2,
+                                  EVT VT1, EVT VT2,
                                   SDValue Op1, SDValue Op2,
                                   SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -4441,29 +4572,35 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
       DeadNodeSet.insert(Used);
   }
 
-  // If NumOps is larger than the # of operands we currently have, reallocate
-  // the operand list.
-  if (NumOps > N->NumOperands) {
-    if (N->OperandsNeedDelete)
-      delete[] N->OperandList;
-
-    if (N->isMachineOpcode()) {
-      // We're creating a final node that will live unmorphed for the
-      // remainder of the current SelectionDAG iteration, so we can allocate
-      // the operands directly out of a pool with no recycling metadata.
-      N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps);
-      N->OperandsNeedDelete = false;
-    } else {
-      N->OperandList = new SDUse[NumOps];
+  if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+    // Initialize the memory references information.
+    MN->setMemRefs(0, 0);
+    // If NumOps is larger than the # of operands we can have in a
+    // MachineSDNode, reallocate the operand list.
+    if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+      if (MN->OperandsNeedDelete)
+        delete[] MN->OperandList;
+      if (NumOps > array_lengthof(MN->LocalOperands))
+        // We're creating a final node that will live unmorphed for the
+        // remainder of the current SelectionDAG iteration, so we can allocate
+        // the operands directly out of a pool with no recycling metadata.
+        MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                        Ops, NumOps);
+      else
+        MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+      MN->OperandsNeedDelete = false;
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
+  } else {
+    // If NumOps is larger than the # of operands we currently have, reallocate
+    // the operand list.
+    if (NumOps > N->NumOperands) {
+      if (N->OperandsNeedDelete)
+        delete[] N->OperandList;
+      N->InitOperands(new SDUse[NumOps], Ops, NumOps);
       N->OperandsNeedDelete = true;
-    }
-  }
-
-  // Assign the new operands.
-  N->NumOperands = NumOps;
-  for (unsigned i = 0, e = NumOps; i != e; ++i) {
-    N->OperandList[i].setUser(N);
-    N->OperandList[i].setInitial(Ops[i]);
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
   }
 
   // Delete any nodes that are still dead after adding the uses for the
@@ -4481,108 +4618,189 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 
-/// getTargetNode - These are used for target selectors to create a new node
-/// with specified return type(s), target opcode, and operands.
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
 ///
-/// Note that getTargetNode returns the resultant node.  If there is already a
+/// Note that getMachineNode returns the resultant node.  If there is already a
 /// node of the specified opcode and operands, it returns that node instead of
 /// the current one.
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) {
-  return getNode(~Opcode, dl, VT).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    SDValue Op1) {
-  return getNode(~Opcode, dl, VT, Op1).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    SDValue Op1, SDValue Op2) {
-  return getNode(~Opcode, dl, VT, Op1, Op2).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    SDValue Op1, SDValue Op2,
-                                    SDValue Op3) {
-  return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    const SDValue *Ops, unsigned NumOps) {
-  return getNode(~Opcode, dl, VT, Ops, NumOps).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Op;
-  return getNode(~Opcode, dl, VTs, &Op, 0).getNode();
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, SDValue Op1) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getNode(~Opcode, dl, VTs, &Op1, 1).getNode();
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, SDValue Op1,
-                                    SDValue Op2) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
-  return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, SDValue Op1,
-                                    SDValue Op2, SDValue Op3) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1,
+                             SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2,
-                                    const SDValue *Ops, unsigned NumOps) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2,
+                             const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2, MVT VT3,
-                                    SDValue Op1, SDValue Op2) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2 };
-  return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2, MVT VT3,
-                                    SDValue Op1, SDValue Op2,
-                                    SDValue Op3) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2, MVT VT3,
-                                    const SDValue *Ops, unsigned NumOps) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, MVT VT3, MVT VT4,
-                                    const SDValue *Ops, unsigned NumOps) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                             EVT VT2, EVT VT3, EVT VT4,
+                             const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    const std::vector<MVT> &ResultTys,
-                                    const SDValue *Ops, unsigned NumOps) {
-  return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             const std::vector<EVT> &ResultTys,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                             const SDValue *Ops, unsigned NumOps) {
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+  MachineSDNode *N;
+  void *IP;
+
+  if (DoCSE) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+    IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return cast<MachineSDNode>(E);
+  }
+
+  // Allocate a new MachineSDNode.
+  N = NodeAllocator.Allocate<MachineSDNode>();
+  new (N) MachineSDNode(~Opcode, DL, VTs);
+
+  // Initialize the operands list.
+  if (NumOps > array_lengthof(N->LocalOperands))
+    // We're creating a final node that will live unmorphed for the
+    // remainder of the current SelectionDAG iteration, so we can allocate
+    // the operands directly out of a pool with no recycling metadata.
+    N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                    Ops, NumOps);
+  else
+    N->InitOperands(N->LocalOperands, Ops, NumOps);
+  N->OperandsNeedDelete = false;
+
+  if (DoCSE)
+    CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetInstrInfo::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                     SDValue Operand) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Subreg = getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, DL,
+                                  VT, Operand, SRIdxVal);
+  return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetInstrInfo::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                    SDValue Operand, SDValue Subreg) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL,
+                                  VT, Operand, Subreg, SRIdxVal);
+  return SDValue(Result, 0);
 }
 
 /// getNodeIfExists - Get the specified node if it's already available, or
@@ -4937,64 +5155,28 @@ HandleSDNode::~HandleSDNode() {
 }
 
 GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
-                                         MVT VT, int64_t o, unsigned char TF)
+                                         EVT VT, int64_t o, unsigned char TF)
   : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
     Offset(o), TargetFlags(TF) {
   TheGlobal = const_cast<GlobalValue*>(GA);
 }
 
-MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt,
-                     const Value *srcValue, int SVO,
-                     unsigned alignment, bool vol)
- : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
-  assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
-  assert(getAlignment() == alignment && "Alignment representation error!");
-  assert(isVolatile() == vol && "Volatile representation error!");
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+                     MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
-                     const SDValue *Ops,
-                     unsigned NumOps, MVT memvt, const Value *srcValue,
-                     int SVO, unsigned alignment, bool vol)
+                     const SDValue *Ops, unsigned NumOps, EVT memvt, 
+                     MachineMemOperand *mmo)
    : SDNode(Opc, dl, VTs, Ops, NumOps),
-     MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
-  assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
-  assert(getAlignment() == alignment && "Alignment representation error!");
-  assert(isVolatile() == vol && "Volatile representation error!");
-}
-
-/// getMemOperand - Return a MachineMemOperand object describing the memory
-/// reference performed by this memory reference.
-MachineMemOperand MemSDNode::getMemOperand() const {
-  int Flags = 0;
-  if (isa<LoadSDNode>(this))
-    Flags = MachineMemOperand::MOLoad;
-  else if (isa<StoreSDNode>(this))
-    Flags = MachineMemOperand::MOStore;
-  else if (isa<AtomicSDNode>(this)) {
-    Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
-  }
-  else {
-    const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this);
-    assert(MemIntrinNode && "Unknown MemSDNode opcode!");
-    if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad;
-    if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore;
-  }
-
-  int Size = (getMemoryVT().getSizeInBits() + 7) >> 3;
-  if (isVolatile()) Flags |= MachineMemOperand::MOVolatile;
-
-  // Check if the memory reference references a frame index
-  const FrameIndexSDNode *FI =
-  dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode());
-  if (!getSrcValue() && FI)
-    return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()),
-                             Flags, 0, Size, getAlignment());
-  else
-    return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(),
-                             Size, getAlignment());
+     MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
 
 /// Profile - Gather unique data for the node.
@@ -5003,19 +5185,30 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
   AddNodeIDNode(ID, this);
 }
 
-static ManagedStatic<std::set<MVT, MVT::compareRawBits> > EVTs;
-static MVT VTs[MVT::LAST_VALUETYPE];
+namespace {
+  struct EVTArray {
+    std::vector<EVT> VTs;
+    
+    EVTArray() {
+      VTs.reserve(MVT::LAST_VALUETYPE);
+      for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+        VTs.push_back(MVT((MVT::SimpleValueType)i));
+    }
+  };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
 static ManagedStatic<sys::SmartMutex<true> > VTMutex;
 
 /// getValueTypeList - Return a pointer to the specified value type.
 ///
-const MVT *SDNode::getValueTypeList(MVT VT) {
-  sys::SmartScopedLock<true> Lock(&*VTMutex);
+const EVT *SDNode::getValueTypeList(EVT VT) {
   if (VT.isExtended()) {
+    sys::SmartScopedLock<true> Lock(*VTMutex);
     return &(*EVTs->insert(VT).first);
   } else {
-    VTs[VT.getSimpleVT()] = VT;
-    return &VTs[VT.getSimpleVT()];
+    return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
   }
 }
 
@@ -5186,14 +5379,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::PCMARKER:      return "PCMarker";
   case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
   case ISD::SRCVALUE:      return "SrcValue";
-  case ISD::MEMOPERAND:    return "MemOperand";
   case ISD::EntryToken:    return "EntryToken";
   case ISD::TokenFactor:   return "TokenFactor";
   case ISD::AssertSext:    return "AssertSext";
   case ISD::AssertZext:    return "AssertZext";
 
   case ISD::BasicBlock:    return "BasicBlock";
-  case ISD::ARG_FLAGS:     return "ArgFlags";
   case ISD::VALUETYPE:     return "ValueType";
   case ISD::Register:      return "Register";
 
@@ -5208,6 +5399,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FRAMEADDR: return "FRAMEADDR";
   case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
   case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::LSDAADDR: return "LSDAADDR";
   case ISD::EHSELECTION: return "EHSELECTION";
   case ISD::EH_RETURN: return "EH_RETURN";
   case ISD::ConstantPool:  return "ConstantPool";
@@ -5239,10 +5431,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::INLINEASM:     return "inlineasm";
   case ISD::DBG_LABEL:     return "dbg_label";
   case ISD::EH_LABEL:      return "eh_label";
-  case ISD::DECLARE:       return "declare";
   case ISD::HANDLENODE:    return "handlenode";
-  case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
-  case ISD::CALL:          return "call";
 
   // Unary operators
   case ISD::FABS:   return "fabs";
@@ -5332,7 +5521,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
 
   case ISD::CONVERT_RNDSAT: {
     switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
-    default: assert(0 && "Unknown cvt code!");
+    default: llvm_unreachable("Unknown cvt code!");
     case ISD::CVT_FF:  return "cvt_ff";
     case ISD::CVT_FS:  return "cvt_fs";
     case ISD::CVT_FU:  return "cvt_fu";
@@ -5351,7 +5540,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::BR_JT:   return "br_jt";
   case ISD::BRCOND:  return "brcond";
   case ISD::BR_CC:   return "br_cc";
-  case ISD::RET:     return "ret";
   case ISD::CALLSEQ_START:  return "callseq_start";
   case ISD::CALLSEQ_END:    return "callseq_end";
 
@@ -5384,7 +5572,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
 
   case ISD::CONDCODE:
     switch (cast<CondCodeSDNode>(this)->get()) {
-    default: assert(0 && "Unknown setcc condition!");
+    default: llvm_unreachable("Unknown setcc condition!");
     case ISD::SETOEQ:  return "setoeq";
     case ISD::SETOGT:  return "setogt";
     case ISD::SETOGE:  return "setoge";
@@ -5463,14 +5651,26 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
     if (getValueType(i) == MVT::Other)
       OS << "ch";
     else
-      OS << getValueType(i).getMVTString();
+      OS << getValueType(i).getEVTString();
   }
   OS << " = " << getOperationName(G);
 }
 
 void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
-  if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
-    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this);
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
     OS << "<";
     for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
       int Idx = SVN->getMaskElt(i);
@@ -5481,9 +5681,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
         OS << Idx;
     }
     OS << ">";
-  }
-
-  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
     OS << '<' << CSDN->getAPIntValue() << '>';
   } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
     if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
@@ -5505,13 +5703,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
-    if (unsigned char TF = GADN->getTargetFlags())
+    if (unsigned int TF = GADN->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
     OS << "<" << FIDN->getIndex() << ">";
   } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
     OS << "<" << JTDN->getIndex() << ">";
-    if (unsigned char TF = JTDN->getTargetFlags())
+    if (unsigned int TF = JTDN->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
     int offset = CP->getOffset();
@@ -5523,7 +5721,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
-    if (unsigned char TF = CP->getTargetFlags())
+    if (unsigned int TF = CP->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
     OS << "<";
@@ -5541,80 +5739,47 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
     OS << "'" << ES->getSymbol() << "'";
-    if (unsigned char TF = ES->getTargetFlags())
+    if (unsigned int TF = ES->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
     if (M->getValue())
       OS << "<" << M->getValue() << ">";
     else
       OS << "<null>";
-  } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) {
-    if (M->MO.getValue())
-      OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">";
-    else
-      OS << "<null:" << M->MO.getOffset() << ">";
-  } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) {
-    OS << N->getArgFlags().getArgFlagsString();
   } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
-    OS << ":" << N->getVT().getMVTString();
+    OS << ":" << N->getVT().getEVTString();
   }
   else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
-    const Value *SrcValue = LD->getSrcValue();
-    int SrcOffset = LD->getSrcValueOffset();
-    OS << " <";
-    if (SrcValue)
-      OS << SrcValue;
-    else
-      OS << "null";
-    OS << ":" << SrcOffset << ">";
+    OS << " <" << *LD->getMemOperand();
 
     bool doExt = true;
     switch (LD->getExtensionType()) {
     default: doExt = false; break;
-    case ISD::EXTLOAD: OS << " <anyext "; break;
-    case ISD::SEXTLOAD: OS << " <sext "; break;
-    case ISD::ZEXTLOAD: OS << " <zext "; break;
+    case ISD::EXTLOAD: OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
     }
     if (doExt)
-      OS << LD->getMemoryVT().getMVTString() << ">";
+      OS << " from " << LD->getMemoryVT().getEVTString();
 
     const char *AM = getIndexedModeName(LD->getAddressingMode());
     if (*AM)
-      OS << " " << AM;
-    if (LD->isVolatile())
-      OS << " <volatile>";
-    OS << " alignment=" << LD->getAlignment();
+      OS << ", " << AM;
+
+    OS << ">";
   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
-    const Value *SrcValue = ST->getSrcValue();
-    int SrcOffset = ST->getSrcValueOffset();
-    OS << " <";
-    if (SrcValue)
-      OS << SrcValue;
-    else
-      OS << "null";
-    OS << ":" << SrcOffset << ">";
+    OS << " <" << *ST->getMemOperand();
 
     if (ST->isTruncatingStore())
-      OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">";
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
 
     const char *AM = getIndexedModeName(ST->getAddressingMode());
     if (*AM)
-      OS << " " << AM;
-    if (ST->isVolatile())
-      OS << " <volatile>";
-    OS << " alignment=" << ST->getAlignment();
-  } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) {
-    const Value *SrcValue = AT->getSrcValue();
-    int SrcOffset = AT->getSrcValueOffset();
-    OS << " <";
-    if (SrcValue)
-      OS << SrcValue;
-    else
-      OS << "null";
-    OS << ":" << SrcOffset << ">";
-    if (AT->isVolatile())
-      OS << " <volatile>";
-    OS << " alignment=" << AT->getAlignment();
+      OS << ", " << AM;
+    
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << " <" << *M->getMemOperand() << ">";
   }
 }
 
@@ -5635,16 +5800,17 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
     if (N->getOperand(i).getNode()->hasOneUse())
       DumpNodes(N->getOperand(i).getNode(), indent+2, G);
     else
-      cerr << "\n" << std::string(indent+2, ' ')
-           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+      errs() << "\n" << std::string(indent+2, ' ')
+             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
 
 
-  cerr << "\n" << std::string(indent, ' ');
+  errs() << "\n";
+  errs().indent(indent);
   N->dump(G);
 }
 
 void SelectionDAG::dump() const {
-  cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
   for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
        I != E; ++I) {
@@ -5655,7 +5821,7 @@ void SelectionDAG::dump() const {
 
   if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
 
-  cerr << "\n\n";
+  errs() << "\n\n";
 }
 
 void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
@@ -5699,6 +5865,11 @@ void SDNode::dumpr() const {
   DumpNodesr(errs(), this, 0, 0, once);
 }
 
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(errs(), this, 0, G, once);
+}
+
 
 // getAddressSpace - Return the address space this GlobalAddress belongs to.
 unsigned GlobalAddressSDNode::getAddressSpace() const {
@@ -5717,7 +5888,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
                                         unsigned &SplatBitSize,
                                         bool &HasAnyUndefs,
                                         unsigned MinSplatBits) {
-  MVT VT = getValueType(0);
+  EVT VT = getValueType(0);
   assert(VT.isVector() && "Expected a vector type");
   unsigned sz = VT.getSizeInBits();
   if (MinSplatBits > sz)
@@ -5767,7 +5938,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
 
     SplatValue = HighValue | LowValue;
     SplatUndef = HighUndef & LowUndef;
-   
+
     sz = HalfSize;
   }
 
@@ -5775,14 +5946,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   return true;
 }
 
-bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   // Find the first non-undef value in the shuffle mask.
   unsigned i, e;
   for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
     /* search */;
 
   assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
-  
+
   // Make sure all remaining elements are either undef or the same as the first
   // non-undef value.
   for (int Idx = Mask[i]; i != e; ++i)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 260911e..9017e43 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Constants.h"
+#include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -49,6 +50,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -104,14 +106,14 @@ static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
 }
 
 /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// MVTs that represent all the individual underlying
+/// EVTs that represent all the individual underlying
 /// non-aggregate types that comprise it.
 ///
 /// If Offsets is non-null, it points to a vector to be filled in
 /// with the in-memory offsets of each of the individual values.
 ///
 static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                            SmallVectorImpl<MVT> &ValueVTs,
+                            SmallVectorImpl<EVT> &ValueVTs,
                             SmallVectorImpl<uint64_t> *Offsets = 0,
                             uint64_t StartingOffset = 0) {
   // Given a struct type, recursively traverse the elements.
@@ -135,9 +137,9 @@ static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
     return;
   }
   // Interpret void as zero return values.
-  if (Ty == Type::VoidTy)
+  if (Ty == Type::getVoidTy(Ty->getContext()))
     return;
-  // Base case: we can get an MVT for this LLVM IR type.
+  // Base case: we can get an EVT for this LLVM IR type.
   ValueVTs.push_back(TLI.getValueType(Ty));
   if (Offsets)
     Offsets->push_back(StartingOffset);
@@ -161,7 +163,7 @@ namespace llvm {
     /// ValueVTs - The value types of the values, which may not be legal, and
     /// may need be promoted or synthesized from one or more registers.
     ///
-    SmallVector<MVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs;
 
     /// RegVTs - The value types of the registers. This is the same size as
     /// ValueVTs and it records, for each value, what the type of the assigned
@@ -172,7 +174,7 @@ namespace llvm {
     /// getRegisterType member function, however when with physical registers
     /// it is necessary to have a separate record of the types.
     ///
-    SmallVector<MVT, 4> RegVTs;
+    SmallVector<EVT, 4> RegVTs;
 
     /// Regs - This list holds the registers assigned to the values.
     /// Each legal or promoted value requires one register, and each
@@ -184,21 +186,21 @@ namespace llvm {
 
     RegsForValue(const TargetLowering &tli,
                  const SmallVector<unsigned, 4> &regs,
-                 MVT regvt, MVT valuevt)
+                 EVT regvt, EVT valuevt)
       : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
     RegsForValue(const TargetLowering &tli,
                  const SmallVector<unsigned, 4> &regs,
-                 const SmallVector<MVT, 4> &regvts,
-                 const SmallVector<MVT, 4> &valuevts)
+                 const SmallVector<EVT, 4> &regvts,
+                 const SmallVector<EVT, 4> &valuevts)
       : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-    RegsForValue(const TargetLowering &tli,
+    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
                  unsigned Reg, const Type *Ty) : TLI(&tli) {
       ComputeValueVTs(tli, Ty, ValueVTs);
 
       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        MVT ValueVT = ValueVTs[Value];
-        unsigned NumRegs = TLI->getNumRegisters(ValueVT);
-        MVT RegisterVT = TLI->getRegisterType(ValueVT);
+        EVT ValueVT = ValueVTs[Value];
+        unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
+        EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
         for (unsigned i = 0; i != NumRegs; ++i)
           Regs.push_back(Reg + i);
         RegVTs.push_back(RegisterVT);
@@ -352,11 +354,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
       unsigned PHIReg = ValueMap[PN];
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
 
-      SmallVector<MVT, 4> ValueVTs;
+      SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        MVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(VT);
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
         const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
         for (unsigned i = 0; i != NumRegisters; ++i)
           BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
@@ -366,7 +368,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
   }
 }
 
-unsigned FunctionLoweringInfo::MakeReg(MVT VT) {
+unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
   return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
 }
 
@@ -378,15 +380,15 @@ unsigned FunctionLoweringInfo::MakeReg(MVT VT) {
 /// will assign registers for each member or element.
 ///
 unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, V->getType(), ValueVTs);
 
   unsigned FirstReg = 0;
   for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    MVT ValueVT = ValueVTs[Value];
-    MVT RegisterVT = TLI.getRegisterType(ValueVT);
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
 
-    unsigned NumRegs = TLI.getNumRegisters(ValueVT);
+    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
       unsigned R = MakeReg(RegisterVT);
       if (!FirstReg) FirstReg = R;
@@ -402,7 +404,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
 /// (ISD::AssertSext).
 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
                                 const SDValue *Parts,
-                                unsigned NumParts, MVT PartVT, MVT ValueVT,
+                                unsigned NumParts, EVT PartVT, EVT ValueVT,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -418,11 +420,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       unsigned RoundParts = NumParts & (NumParts - 1) ?
         1 << Log2_32(NumParts) : NumParts;
       unsigned RoundBits = PartBits * RoundParts;
-      MVT RoundVT = RoundBits == ValueBits ?
-        ValueVT : MVT::getIntegerVT(RoundBits);
+      EVT RoundVT = RoundBits == ValueBits ?
+        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
       SDValue Lo, Hi;
 
-      MVT HalfVT = MVT::getIntegerVT(RoundBits/2);
+      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
 
       if (RoundParts > 2) {
         Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
@@ -439,7 +441,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       if (RoundParts < NumParts) {
         // Assemble the trailing non-power-of-2 part.
         unsigned OddParts = NumParts - RoundParts;
-        MVT OddVT = MVT::getIntegerVT(OddParts * PartBits);
+        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
         Hi = getCopyFromParts(DAG, dl,
                               Parts+RoundParts, OddParts, PartVT, OddVT);
 
@@ -447,7 +449,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
         Lo = Val;
         if (TLI.isBigEndian())
           std::swap(Lo, Hi);
-        MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits);
+        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
         Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
                          DAG.getConstant(Lo.getValueType().getSizeInBits(),
@@ -457,11 +459,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       }
     } else if (ValueVT.isVector()) {
       // Handle a multi-element vector.
-      MVT IntermediateVT, RegisterVT;
+      EVT IntermediateVT, RegisterVT;
       unsigned NumIntermediates;
       unsigned NumRegs =
-        TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
-                                   RegisterVT);
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 
+                                   NumIntermediates, RegisterVT);
       assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
       NumParts = NumRegs; // Silence a compiler warning.
       assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -494,11 +496,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
                         ValueVT, &Ops[0], NumIntermediates);
     } else if (PartVT.isFloatingPoint()) {
       // FP split into multiple FP parts (for ppcf128)
-      assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) &&
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
              "Unexpected split");
       SDValue Lo, Hi;
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
@@ -506,7 +508,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       // FP split into integer parts (soft fp)
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
              !PartVT.isVector() && "Unexpected split");
-      MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits());
+      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
       Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
     }
   }
@@ -555,7 +557,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
     return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 
-  assert(0 && "Unknown mismatch!");
+  llvm_unreachable("Unknown mismatch!");
   return SDValue();
 }
 
@@ -563,11 +565,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
-                           SDValue *Parts, unsigned NumParts, MVT PartVT,
+                           SDValue *Parts, unsigned NumParts, EVT PartVT,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MVT PtrVT = TLI.getPointerTy();
-  MVT ValueVT = Val.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
+  EVT ValueVT = Val.getValueType();
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
   assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
@@ -588,10 +590,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
         assert(NumParts == 1 && "Do not know what to promote to!");
         Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
       } else if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
       } else {
-        assert(0 && "Unknown mismatch!");
+        llvm_unreachable("Unknown mismatch!");
       }
     } else if (PartBits == ValueVT.getSizeInBits()) {
       // Different types of the same size.
@@ -600,10 +602,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
     } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
       // If the parts cover less bits than value has, truncate the value.
       if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
       } else {
-        assert(0 && "Unknown mismatch!");
+        llvm_unreachable("Unknown mismatch!");
       }
     }
 
@@ -634,19 +636,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
         // The odd parts were reversed by getCopyToParts - unreverse them.
         std::reverse(Parts + RoundParts, Parts + NumParts);
       NumParts = RoundParts;
-      ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
     }
 
     // The number of parts is a power of 2.  Repeatedly bisect the value using
     // EXTRACT_ELEMENT.
     Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::getIntegerVT(ValueVT.getSizeInBits()),
+                           EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
                            Val);
     for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
       for (unsigned i = 0; i < NumParts; i += StepSize) {
         unsigned ThisBits = StepSize * PartBits / 2;
-        MVT ThisVT = MVT::getIntegerVT (ThisBits);
+        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
         SDValue &Part0 = Parts[i];
         SDValue &Part1 = Parts[i+StepSize/2];
 
@@ -692,11 +694,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
   }
 
   // Handle a multi-element vector.
-  MVT IntermediateVT, RegisterVT;
+  EVT IntermediateVT, RegisterVT;
   unsigned NumIntermediates;
-  unsigned NumRegs = TLI
-      .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
-                              RegisterVT);
+  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+                              IntermediateVT, NumIntermediates, RegisterVT);
   unsigned NumElements = ValueVT.getVectorNumElements();
 
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
@@ -750,8 +751,10 @@ void SelectionDAGLowering::clear() {
   NodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
+  EdgeMapping.clear();
   DAG.clear();
   CurDebugLoc = DebugLoc::getUnknownLoc();
+  HasTailCall = false;
 }
 
 /// getRoot - Return the current virtual root of the Selection DAG,
@@ -817,8 +820,7 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
-  default: assert(0 && "Unknown instruction type encountered!");
-           abort();
+  default: llvm_unreachable("Unknown instruction type encountered!");
     // Build the switch statement using the Instruction.def file.
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
   case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
@@ -831,7 +833,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
   if (N.getNode()) return N;
 
   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
-    MVT VT = TLI.getValueType(V->getType(), true);
+    EVT VT = TLI.getValueType(V->getType(), true);
 
     if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return N = DAG.getConstant(*CI, VT);
@@ -860,6 +862,10 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
       for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
            OI != OE; ++OI) {
         SDNode *Val = getValue(*OI).getNode();
+        // If the operand is an empty aggregate, there are no values.
+        if (!Val) continue;
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
         for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
           Constants.push_back(SDValue(Val, i));
       }
@@ -871,14 +877,14 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
              "Unknown struct or array constant!");
 
-      SmallVector<MVT, 4> ValueVTs;
+      SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(TLI, C->getType(), ValueVTs);
       unsigned NumElts = ValueVTs.size();
       if (NumElts == 0)
         return SDValue(); // empty struct
       SmallVector<SDValue, 4> Constants(NumElts);
       for (unsigned i = 0; i != NumElts; ++i) {
-        MVT EltVT = ValueVTs[i];
+        EVT EltVT = ValueVTs[i];
         if (isa<UndefValue>(C))
           Constants[i] = DAG.getUNDEF(EltVT);
         else if (EltVT.isFloatingPoint())
@@ -900,7 +906,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
         Ops.push_back(getValue(CP->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
-      MVT EltVT = TLI.getValueType(VecTy->getElementType());
+      EVT EltVT = TLI.getValueType(VecTy->getElementType());
 
       SDValue Op;
       if (EltVT.isFloatingPoint())
@@ -927,30 +933,24 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
   unsigned InReg = FuncInfo.ValueMap[V];
   assert(InReg && "Value not in map!");
 
-  RegsForValue RFV(TLI, InReg, V->getType());
+  RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
   SDValue Chain = DAG.getEntryNode();
   return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
 }
 
 
 void SelectionDAGLowering::visitRet(ReturnInst &I) {
-  if (I.getNumOperands() == 0) {
-    DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(),
-                            MVT::Other, getControlRoot()));
-    return;
-  }
-
-  SmallVector<SDValue, 8> NewValues;
-  NewValues.push_back(getControlRoot());
+  SDValue Chain = getControlRoot();
+  SmallVector<ISD::OutputArg, 8> Outs;
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-    SmallVector<MVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
     if (NumValues == 0) continue;
 
     SDValue RetOp = getValue(I.getOperand(i));
     for (unsigned j = 0, f = NumValues; j != f; ++j) {
-      MVT VT = ValueVTs[j];
+      EVT VT = ValueVTs[j];
 
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -965,13 +965,13 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) {
       // conventions. The frontend should mark functions whose return values
       // require promoting with signext or zeroext attributes.
       if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-        MVT MinVT = TLI.getRegisterType(MVT::i32);
+        EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
         if (VT.bitsLT(MinVT))
           VT = MinVT;
       }
 
-      unsigned NumParts = TLI.getNumRegisters(VT);
-      MVT PartVT = TLI.getRegisterType(VT);
+      unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+      EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       getCopyToParts(DAG, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -981,14 +981,30 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) {
       ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
       if (F->paramHasAttr(0, Attribute::InReg))
         Flags.setInReg();
-      for (unsigned i = 0; i < NumParts; ++i) {
-        NewValues.push_back(Parts[i]);
-        NewValues.push_back(DAG.getArgFlags(Flags));
-      }
+
+      // Propagate extension type if any
+      if (F->paramHasAttr(0, Attribute::SExt))
+        Flags.setSExt();
+      else if (F->paramHasAttr(0, Attribute::ZExt))
+        Flags.setZExt();
+
+      for (unsigned i = 0; i < NumParts; ++i)
+        Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
     }
   }
-  DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other,
-                          &NewValues[0], NewValues.size()));
+
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  CallingConv::ID CallConv =
+    DAG.getMachineFunction().getFunction()->getCallingConv();
+  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+                          Outs, getCurDebugLoc(), DAG);
+
+  // Verify that the target's LowerReturn behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerReturn didn't return a valid chain!");
+
+  // Update the DAG with the new chain value resulting from return lowering.
+  DAG.setRoot(Chain);
 }
 
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
@@ -1073,7 +1089,7 @@ static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
   case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
   case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
   default:
-    assert(0 && "Invalid FCmp predicate opcode!");
+    llvm_unreachable("Invalid FCmp predicate opcode!");
     FOC = FPC = ISD::SETFALSE;
     break;
   }
@@ -1099,7 +1115,7 @@ static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
   case ICmpInst::ICMP_SGT: return ISD::SETGT;
   case ICmpInst::ICMP_UGT: return ISD::SETUGT;
   default:
-    assert(0 && "Invalid ICmp predicate opcode!");
+    llvm_unreachable("Invalid ICmp predicate opcode!");
     return ISD::SETNE;
   }
 }
@@ -1131,7 +1147,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
         Condition = getFCmpCondCode(FC->getPredicate());
       } else {
         Condition = ISD::SETEQ; // silence warning.
-        assert(0 && "Unknown compare instruction");
+        llvm_unreachable("Unknown compare instruction");
       }
 
       CaseBlock CB(Condition, BOp->getOperand(0),
@@ -1142,7 +1158,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
   }
 
   // Create a CaseBlock record representing this branch.
-  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
                NULL, TBB, FBB, CurBB);
   SwitchCases.push_back(CB);
 }
@@ -1229,7 +1245,7 @@ void SelectionDAGLowering::visitBr(BranchInst &I) {
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (I.isUnconditional()) {
@@ -1290,14 +1306,14 @@ void SelectionDAGLowering::visitBr(BranchInst &I) {
       // Okay, we decided not to do this, remove any inserted MBB's and clear
       // SwitchCases.
       for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
-        CurMBB->getParent()->erase(SwitchCases[i].ThisBB);
+        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
 
       SwitchCases.clear();
     }
   }
 
   // Create a CaseBlock record representing this branch.
-  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
                NULL, Succ0MBB, Succ1MBB, CurMBB);
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
@@ -1315,9 +1331,11 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
   if (CB.CmpMHS == NULL) {
     // Fold "(X == true)" to X and "(X == false)" to !X to
     // handle common cases produced by branch lowering.
-    if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+        CB.CC == ISD::SETEQ)
       Cond = CondLHS;
-    else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+             CB.CC == ISD::SETEQ) {
       SDValue True = DAG.getConstant(1, CondLHS.getValueType());
       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
     } else
@@ -1329,7 +1347,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
     const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
 
     SDValue CmpOp = getValue(CB.CmpMHS);
-    MVT VT = CmpOp.getValueType();
+    EVT VT = CmpOp.getValueType();
 
     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
       Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
@@ -1350,7 +1368,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   // If the lhs block is the next block, invert the condition so that we can
@@ -1385,7 +1403,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
 void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
   // Emit the code for the jump table
   assert(JT.Reg != -1U && "Should lower JT Header first!");
-  MVT PTy = TLI.getPointerTy();
+  EVT PTy = TLI.getPointerTy();
   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                      JT.Reg, PTy);
   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1402,7 +1420,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
   SDValue SwitchOp = getValue(JTH.SValue);
-  MVT VT = SwitchOp.getValueType();
+  EVT VT = SwitchOp.getValueType();
   SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
                             DAG.getConstant(JTH.First, VT));
 
@@ -1411,12 +1429,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
   // can be used as an index into the jump table in a subsequent basic block.
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
-  if (VT.bitsGT(TLI.getPointerTy()))
-    SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                           TLI.getPointerTy(), SUB);
-  else
-    SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                           TLI.getPointerTy(), SUB);
+  SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
 
   unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
@@ -1435,7 +1448,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
@@ -1454,7 +1467,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
 void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
-  MVT VT = SwitchOp.getValueType();
+  EVT VT = SwitchOp.getValueType();
   SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
                             DAG.getConstant(B.First, VT));
 
@@ -1464,13 +1477,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
                                   SUB, DAG.getConstant(B.Range, VT),
                                   ISD::SETUGT);
 
-  SDValue ShiftOp;
-  if (VT.bitsGT(TLI.getPointerTy()))
-    ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                          TLI.getPointerTy(), SUB);
-  else
-    ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                          TLI.getPointerTy(), SUB);
+  SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
 
   B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
@@ -1480,7 +1487,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
@@ -1531,7 +1538,7 @@ void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (NextMBB == NextBlock)
@@ -1584,13 +1591,13 @@ bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CR.CaseBB;
 
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   // TODO: If any two of the cases has the same destination, and if one value
@@ -1698,14 +1705,11 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != CurMBB->getParent()->end())
-    NextBlock = BBI;
+  ++BBI;
 
   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
 
@@ -1771,14 +1775,11 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
                                                    MachineBasicBlock* Default) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != CurMBB->getParent()->end())
-    NextBlock = BBI;
+  ++BBI;
 
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
@@ -1898,14 +1899,15 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
                                                     CaseRecVector& WorkList,
                                                     Value* SV,
                                                     MachineBasicBlock* Default){
-  unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits();
+  EVT PTy = TLI.getPointerTy();
+  unsigned IntPtrBits = PTy.getSizeInBits();
 
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // If target does not have legal shift left, do not emit bit tests at all.
   if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
@@ -2069,7 +2071,6 @@ size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
 void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
 
   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
 
@@ -2174,24 +2175,26 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
   if (!isa<VectorType>(I.getType()) &&
       Op2.getValueType() != TLI.getShiftAmountTy()) {
     // If the operand is smaller than the shift count type, promote it.
-    if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
+    EVT PTy = TLI.getPointerTy();
+    EVT STy = TLI.getShiftAmountTy();
+    if (STy.bitsGT(Op2.getValueType()))
       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
                         TLI.getShiftAmountTy(), Op2);
     // If the operand is larger than the shift count type but the shift
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
     // optimization early.
-    else if (TLI.getShiftAmountTy().getSizeInBits() >=
+    else if (STy.getSizeInBits() >=
              Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                         TLI.getShiftAmountTy(), Op2);
     // Otherwise we'll need to temporarily settle for some other
     // convenient type; type legalization will make adjustments as
     // needed.
-    else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
+    else if (PTy.bitsLT(Op2.getValueType()))
       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                         TLI.getPointerTy(), Op2);
-    else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))
+    else if (PTy.bitsGT(Op2.getValueType()))
       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
                         TLI.getPointerTy(), Op2);
   }
@@ -2209,7 +2212,9 @@ void SelectionDAGLowering::visitICmp(User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Opcode = getICmpCondCode(predicate);
-  setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode));
+  
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
 void SelectionDAGLowering::visitFCmp(User &I) {
@@ -2221,38 +2226,12 @@ void SelectionDAGLowering::visitFCmp(User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
-  setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition));
-}
-
-void SelectionDAGLowering::visitVICmp(User &I) {
-  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
-  if (VICmpInst *IC = dyn_cast<VICmpInst>(&I))
-    predicate = IC->getPredicate();
-  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
-    predicate = ICmpInst::Predicate(IC->getPredicate());
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  ISD::CondCode Opcode = getICmpCondCode(predicate);
-  setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(),
-                             Op1, Op2, Opcode));
-}
-
-void SelectionDAGLowering::visitVFCmp(User &I) {
-  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
-  if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I))
-    predicate = FC->getPredicate();
-  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
-    predicate = FCmpInst::Predicate(FC->getPredicate());
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  ISD::CondCode Condition = getFCmpCondCode(predicate);
-  MVT DestVT = TLI.getValueType(I.getType());
-
-  setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
 void SelectionDAGLowering::visitSelect(User &I) {
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
   if (NumValues != 0) {
@@ -2277,7 +2256,7 @@ void SelectionDAGLowering::visitSelect(User &I) {
 void SelectionDAGLowering::visitTrunc(User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
@@ -2285,7 +2264,7 @@ void SelectionDAGLowering::visitZExt(User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
@@ -2293,14 +2272,14 @@ void SelectionDAGLowering::visitSExt(User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitFPTrunc(User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
                            DestVT, N, DAG.getIntPtrConstant(0)));
 }
@@ -2308,35 +2287,35 @@ void SelectionDAGLowering::visitFPTrunc(User &I) {
 void SelectionDAGLowering::visitFPExt(User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitFPToUI(User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitFPToSI(User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitUIToFP(User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitSIToFP(User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
@@ -2344,14 +2323,9 @@ void SelectionDAGLowering::visitPtrToInt(User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  MVT SrcVT = N.getValueType();
-  MVT DestVT = TLI.getValueType(I.getType());
-  SDValue Result;
-  if (DestVT.bitsLT(SrcVT))
-    Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);
-  else
-    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
-    Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
   setValue(&I, Result);
 }
 
@@ -2359,19 +2333,14 @@ void SelectionDAGLowering::visitIntToPtr(User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  MVT SrcVT = N.getValueType();
-  MVT DestVT = TLI.getValueType(I.getType());
-  if (DestVT.bitsLT(SrcVT))
-    setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
-  else
-    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
-    setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                             DestVT, N));
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
 void SelectionDAGLowering::visitBitCast(User &I) {
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this
   // is either a BIT_CONVERT or a no-op.
@@ -2422,7 +2391,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
   // Convert the ConstantVector mask operand into an array of ints, with -1
   // representing undef values.
   SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), 
+                                                     MaskElts);
   unsigned MaskNumElts = MaskElts.size();
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     if (isa<UndefValue>(MaskElts[i]))
@@ -2431,8 +2401,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
       Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
   }
   
-  MVT VT = TLI.getValueType(I.getType());
-  MVT SrcVT = Src1.getValueType();
+  EVT VT = TLI.getValueType(I.getType());
+  EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
   if (SrcNumElts == MaskNumElts) {
@@ -2531,7 +2501,7 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
       }
     }
 
-    if (RangeUse[0] == 0 && RangeUse[0] == 0) {
+    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
       setValue(&I, DAG.getUNDEF(VT));  // Vectors are not used.
       return;
     }
@@ -2566,8 +2536,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
   // We can't use either concat vectors or extract subvectors so fall back to
   // replacing the shuffle with extract and build vector.
   // to insert and build vector.
-  MVT EltVT = VT.getVectorElementType();
-  MVT PtrVT = TLI.getPointerTy();
+  EVT EltVT = VT.getVectorElementType();
+  EVT PtrVT = TLI.getPointerTy();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     if (Mask[i] < 0) {
@@ -2598,9 +2568,9 @@ void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
                                             I.idx_begin(), I.idx_end());
 
-  SmallVector<MVT, 4> AggValueVTs;
+  SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
-  SmallVector<MVT, 4> ValValueVTs;
+  SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
 
   unsigned NumAggValues = AggValueVTs.size();
@@ -2637,7 +2607,7 @@ void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
                                             I.idx_begin(), I.idx_end());
 
-  SmallVector<MVT, 4> ValValueVTs;
+  SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
 
   unsigned NumValValues = ValValueVTs.size();
@@ -2682,7 +2652,8 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         SDValue OffsVal;
-        unsigned PtrBits = TLI.getPointerTy().getSizeInBits();
+        EVT PTy = TLI.getPointerTy();
+        unsigned PtrBits = PTy.getSizeInBits();
         if (PtrBits < 64) {
           OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                                 TLI.getPointerTy(),
@@ -2700,12 +2671,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
 
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
-      if (IdxN.getValueType().bitsLT(N.getValueType()))
-        IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(),
-                           N.getValueType(), IdxN);
-      else if (IdxN.getValueType().bitsGT(N.getValueType()))
-        IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                           N.getValueType(), IdxN);
+      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
 
       // If this is a multiply by a power of two, turn it into a shl
       // immediately.  This is a very common case.
@@ -2749,13 +2715,8 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
   
   
   
-  MVT IntPtr = TLI.getPointerTy();
-  if (IntPtr.bitsLT(AllocSize.getValueType()))
-    AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                            IntPtr, AllocSize);
-  else if (IntPtr.bitsGT(AllocSize.getValueType()))
-    AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                            IntPtr, AllocSize);
+  EVT IntPtr = TLI.getPointerTy();
+  AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
 
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
@@ -2784,7 +2745,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
 
   // Inform the Frame Information that we have just allocated a variable-sized
   // object.
-  CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
 }
 
 void SelectionDAGLowering::visitLoad(LoadInst &I) {
@@ -2795,7 +2756,7 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) {
   bool isVolatile = I.isVolatile();
   unsigned Alignment = I.getAlignment();
 
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
@@ -2818,14 +2779,13 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) {
 
   SmallVector<SDValue, 4> Values(NumValues);
   SmallVector<SDValue, 4> Chains(NumValues);
-  MVT PtrVT = Ptr.getValueType();
+  EVT PtrVT = Ptr.getValueType();
   for (unsigned i = 0; i != NumValues; ++i) {
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                              DAG.getNode(ISD::ADD, getCurDebugLoc(),
-                                          PtrVT, Ptr,
-                                          DAG.getConstant(Offsets[i], PtrVT)),
-                              SV, Offsets[i],
-                              isVolatile, Alignment);
+                            DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                        PtrVT, Ptr,
+                                        DAG.getConstant(Offsets[i], PtrVT)),
+                            SV, Offsets[i], isVolatile, Alignment);
     Values[i] = L;
     Chains[i] = L.getValue(1);
   }
@@ -2850,7 +2810,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
   Value *SrcV = I.getOperand(0);
   Value *PtrV = I.getOperand(1);
 
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
@@ -2865,7 +2825,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
 
   SDValue Root = getRoot();
   SmallVector<SDValue, 4> Chains(NumValues);
-  MVT PtrVT = Ptr.getValueType();
+  EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
   unsigned Alignment = I.getAlignment();
   for (unsigned i = 0; i != NumValues; ++i)
@@ -2874,8 +2834,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
                              DAG.getNode(ISD::ADD, getCurDebugLoc(),
                                          PtrVT, Ptr,
                                          DAG.getConstant(Offsets[i], PtrVT)),
-                             PtrV, Offsets[i],
-                             isVolatile, Alignment);
+                             PtrV, Offsets[i], isVolatile, Alignment);
 
   DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                           MVT::Other, &Chains[0], NumValues));
@@ -2915,24 +2874,18 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
     Ops.push_back(Op);
   }
 
-  std::vector<MVT> VTArray;
-  if (I.getType() != Type::VoidTy) {
-    MVT VT = TLI.getValueType(I.getType());
-    if (VT.isVector()) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      MVT EltVT = TLI.getValueType(DestTy->getElementType());
-
-      VT = MVT::getVectorVT(EltVT, DestTy->getNumElements());
-      assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
-    }
-
-    assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
-    VTArray.push_back(VT);
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+           "Intrinsic uses a non-legal type?");
   }
+#endif // NDEBUG
   if (HasChain)
-    VTArray.push_back(MVT::Other);
+    ValueVTs.push_back(MVT::Other);
 
-  SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size());
+  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
 
   // Create the node.
   SDValue Result;
@@ -2947,7 +2900,7 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
   else if (!HasChain)
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
-  else if (I.getType() != Type::VoidTy)
+  else if (I.getType() != Type::getVoidTy(*DAG.getContext()))
     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
   else
@@ -2961,9 +2914,9 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
     else
       DAG.setRoot(Chain);
   }
-  if (I.getType() != Type::VoidTy) {
+  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
-      MVT VT = TLI.getValueType(PTy);
+      EVT VT = TLI.getValueType(PTy);
       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
     }
     setValue(&I, Result);
@@ -3890,7 +3843,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW
         && DW->ShouldEmitDwarfDebug()) {
       unsigned LabelID =
-        DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));
+        DW->RecordRegionStart(RSI.getContext());
       DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
                                getRoot(), LabelID));
     }
@@ -3905,7 +3858,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       return 0;
 
     MachineFunction &MF = DAG.getMachineFunction();
-    DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    DISubprogram Subprogram(REI.getContext());
     
     if (isInlinedFnEnd(REI, MF.getFunction())) {
       // This is end of inlined function. Debugging information for inlined
@@ -3924,7 +3877,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     } 
 
     unsigned LabelID =
-      DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
+      DW->RecordRegionEnd(REI.getContext());
     DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
                              getRoot(), LabelID));
     return 0;
@@ -3932,8 +3885,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::dbg_func_start: {
     DwarfWriter *DW = DAG.getDwarfWriter();
     DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
-    if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None) || !DW
-        || !DW->ShouldEmitDwarfDebug()) 
+    if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None))
       return 0;
 
     MachineFunction &MF = DAG.getMachineFunction();
@@ -3954,9 +3906,11 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       // Record the source line.
       setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
       
+      if (!DW || !DW->ShouldEmitDwarfDebug())
+        return 0;
       DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-      DISubprogram SP(cast<GlobalVariable>(FSI.getSubprogram()));
-      DICompileUnit CU(PrevLocTpl.CompileUnit);
+      DISubprogram SP(FSI.getSubprogram());
+      DICompileUnit CU(PrevLocTpl.Scope);
       unsigned LabelID = DW->RecordInlinedFnStart(SP, CU,
                                                   PrevLocTpl.Line,
                                                   PrevLocTpl.Col);
@@ -3967,23 +3921,44 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 
     // This is a beginning of a new function.
     MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-                    
+
+    if (!DW || !DW->ShouldEmitDwarfDebug())
+      return 0;
     // llvm.dbg.func_start also defines beginning of function scope.
-    DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
+    DW->RecordRegionStart(FSI.getSubprogram());
     return 0;
   }
   case Intrinsic::dbg_declare: {
     if (OptLevel != CodeGenOpt::None) 
       // FIXME: Variable debug info is not supported here.
       return 0;
-
+    DwarfWriter *DW = DAG.getDwarfWriter();
+    if (!DW)
+      return 0;
     DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
       return 0;
 
-    Value *Variable = DI.getVariable();
-    DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
-                            getValue(DI.getAddress()), getValue(Variable)));
+    MDNode *Variable = DI.getVariable();
+    Value *Address = DI.getAddress();
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+      Address = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end()) 
+      return 0; // VLAs.
+    int FI = SI->second;
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    if (MMI) 
+      MMI->setVariableDbgInfo(Variable, FI);
+#else
+    DW->RecordVariable(Variable, FI);
+#endif
     return 0;
   }
   case Intrinsic::eh_exception: {
@@ -3998,54 +3973,45 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   }
 
-  case Intrinsic::eh_selector_i32:
-  case Intrinsic::eh_selector_i64: {
+  case Intrinsic::eh_selector: {
     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-    MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ?
-                         MVT::i32 : MVT::i64);
 
-    if (MMI) {
-      if (CurMBB->isLandingPad())
-        AddCatchInfo(I, MMI, CurMBB);
-      else {
+    if (CurMBB->isLandingPad())
+      AddCatchInfo(I, MMI, CurMBB);
+    else {
 #ifndef NDEBUG
-        FuncInfo.CatchInfoLost.insert(&I);
+      FuncInfo.CatchInfoLost.insert(&I);
 #endif
-        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-        unsigned Reg = TLI.getExceptionSelectorRegister();
-        if (Reg) CurMBB->addLiveIn(Reg);
-      }
-
-      // Insert the EHSELECTION instruction.
-      SDVTList VTs = DAG.getVTList(VT, MVT::Other);
-      SDValue Ops[2];
-      Ops[0] = getValue(I.getOperand(1));
-      Ops[1] = getRoot();
-      SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-      setValue(&I, Op);
-      DAG.setRoot(Op.getValue(1));
-    } else {
-      setValue(&I, DAG.getConstant(0, VT));
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) CurMBB->addLiveIn(Reg);
     }
 
+    // Insert the EHSELECTION instruction.
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[2];
+    Ops[0] = getValue(I.getOperand(1));
+    Ops[1] = getRoot();
+    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+
+    DAG.setRoot(Op.getValue(1));
+
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
     return 0;
   }
 
-  case Intrinsic::eh_typeid_for_i32:
-  case Intrinsic::eh_typeid_for_i64: {
+  case Intrinsic::eh_typeid_for: {
     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-    MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ?
-                         MVT::i32 : MVT::i64);
 
     if (MMI) {
       // Find the type id for the given typeinfo.
       GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
 
       unsigned TypeID = MMI->getTypeIDFor(GV);
-      setValue(&I, DAG.getConstant(TypeID, VT));
+      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
     } else {
       // Return something different to eh_selector.
-      setValue(&I, DAG.getConstant(1, VT));
+      setValue(&I, DAG.getConstant(1, MVT::i32));
     }
 
     return 0;
@@ -4073,14 +4039,9 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
 
   case Intrinsic::eh_dwarf_cfa: {
-    MVT VT = getValue(I.getOperand(1)).getValueType();
-    SDValue CfaArg;
-    if (VT.bitsGT(TLI.getPointerTy()))
-      CfaArg = DAG.getNode(ISD::TRUNCATE, dl,
-                           TLI.getPointerTy(), getValue(I.getOperand(1)));
-    else
-      CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl,
-                           TLI.getPointerTy(), getValue(I.getOperand(1)));
+    EVT VT = getValue(I.getOperand(1)).getValueType();
+    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+                                        TLI.getPointerTy());
 
     SDValue Offset = DAG.getNode(ISD::ADD, dl,
                                  TLI.getPointerTy(),
@@ -4096,7 +4057,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                              Offset));
     return 0;
   }
-
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4118,7 +4078,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
-    MVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI.getValueType(I.getType());
     Value* Op1 = I.getOperand(1);
     setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
                                 DAG.getValueType(DestVT),
@@ -4182,16 +4142,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Tmp.getValue(1));
     return 0;
   }
-  case Intrinsic::part_select: {
-    // Currently not implemented: just abort
-    assert(0 && "part_select intrinsic not implemented");
-    abort();
-  }
-  case Intrinsic::part_set: {
-    // Currently not implemented: just abort
-    assert(0 && "part_set intrinsic not implemented");
-    abort();
-  }
   case Intrinsic::bswap:
     setValue(&I, DAG.getNode(ISD::BSWAP, dl,
                              getValue(I.getOperand(1)).getValueType(),
@@ -4199,21 +4149,21 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getOperand(1));
-    MVT Ty = Arg.getValueType();
+    EVT Ty = Arg.getValueType();
     SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
     setValue(&I, result);
     return 0;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getOperand(1));
-    MVT Ty = Arg.getValueType();
+    EVT Ty = Arg.getValueType();
     SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
     setValue(&I, result);
     return 0;
   }
   case Intrinsic::ctpop: {
     SDValue Arg = getValue(I.getOperand(1));
-    MVT Ty = Arg.getValueType();
+    EVT Ty = Arg.getValueType();
     SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
     setValue(&I, result);
     return 0;
@@ -4235,7 +4185,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    MVT PtrTy = TLI.getPointerTy();
+    EVT PtrTy = TLI.getPointerTy();
 
     SDValue Src = getValue(I.getOperand(1));   // The guard's value.
     AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
@@ -4289,7 +4239,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::gcread:
   case Intrinsic::gcwrite:
-    assert(0 && "GC failed to lower gcread/gcwrite intrinsics!");
+    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
     return 0;
 
   case Intrinsic::flt_rounds: {
@@ -4373,9 +4323,76 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 }
 
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+/// For target-dependent requirements, a target should override
+/// TargetLowering::IsEligibleForTailCallOptimization.
+///
+static bool
+isInTailCallPosition(const Instruction *I, Attributes RetAttr,
+                     const TargetLowering &TLI) {
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or an unreachable.
+  if (!Ret && !isa<UnreachableInst>(Term)) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return.
+  if (F->getAttributes().getRetAttributes() != RetAttr)
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (isa<PointerType>(U->getOperand(0)->getType()) &&
+          isa<PointerType>(U->getType()))))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
 
 void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
-                                       bool IsTailCall,
+                                       bool isTailCall,
                                        MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
@@ -4385,8 +4402,9 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Args.reserve(CS.arg_size());
+  unsigned j = 1;
   for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
-       i != e; ++i) {
+       i != e; ++i, ++j) {
     SDValue ArgNode = getValue(*i);
     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
 
@@ -4405,6 +4423,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
     BeginLabel = MMI->NextLabelID();
+
     // Both PendingLoads and PendingExports must be flushed here;
     // this call might not return.
     (void)getRoot();
@@ -4412,17 +4431,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
                              getControlRoot(), BeginLabel));
   }
 
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI.LowerCallTo.
+  if (isTailCall &&
+      !isInTailCallPosition(CS.getInstruction(),
+                            CS.getAttributes().getRetAttributes(),
+                            TLI))
+    isTailCall = false;
+
   std::pair<SDValue,SDValue> Result =
     TLI.LowerCallTo(getRoot(), CS.getType(),
                     CS.paramHasAttr(0, Attribute::SExt),
                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                     CS.getCallingConv(),
-                    IsTailCall && PerformTailCallOpt,
+                    isTailCall,
+                    !CS.getInstruction()->use_empty(),
                     Callee, Args, DAG, getCurDebugLoc());
-  if (CS.getType() != Type::VoidTy)
+  assert((isTailCall || Result.second.getNode()) &&
+         "Non-null chain expected with non-tail call!");
+  assert((Result.second.getNode() || !Result.first.getNode()) &&
+         "Null value expected with tail call!");
+  if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
-  DAG.setRoot(Result.second);
+  // As a special case, a null chain means that a tail call has
+  // been emitted and the DAG root is already updated.
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
+  else
+    HasTailCall = true;
 
   if (LandingPad && MMI) {
     // Insert a label at the end of the invoke call to mark the try range.  This
@@ -4458,12 +4495,9 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
 
     // Check for well-known libc/libm calls.  If the function is internal, it
     // can't be a library call.
-    unsigned NameLen = F->getNameLen();
-    if (!F->hasLocalLinkage() && NameLen) {
-      const char *NameStr = F->getNameStart();
-      if (NameStr[0] == 'c' &&
-          ((NameLen == 8 && !strcmp(NameStr, "copysign")) ||
-           (NameLen == 9 && !strcmp(NameStr, "copysignf")))) {
+    if (!F->hasLocalLinkage() && F->hasName()) {
+      StringRef Name = F->getName();
+      if (Name == "copysign" || Name == "copysignf") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
             I.getType() == I.getOperand(1)->getType() &&
@@ -4474,10 +4508,7 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
                                    LHS.getValueType(), LHS, RHS));
           return;
         }
-      } else if (NameStr[0] == 'f' &&
-                 ((NameLen == 4 && !strcmp(NameStr, "fabs")) ||
-                  (NameLen == 5 && !strcmp(NameStr, "fabsf")) ||
-                  (NameLen == 5 && !strcmp(NameStr, "fabsl")))) {
+      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
             I.getType() == I.getOperand(1)->getType()) {
@@ -4486,30 +4517,36 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (NameStr[0] == 's' &&
-                 ((NameLen == 3 && !strcmp(NameStr, "sin")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "sinf")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "sinl")))) {
+      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType()) {
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (NameStr[0] == 'c' &&
-                 ((NameLen == 3 && !strcmp(NameStr, "cos")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "cosf")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "cosl")))) {
+      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType()) {
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
                                    Tmp.getValueType(), Tmp));
           return;
         }
+      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
       }
     }
   } else if (isa<InlineAsm>(I.getOperand(0))) {
@@ -4523,7 +4560,12 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
   else
     Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
 
-  LowerCallTo(&I, Callee, I.isTailCall());
+  // Check if we can potentially perform a tail call. More detailed
+  // checking is be done within LowerCallTo, after more information
+  // about the call is known.
+  bool isTailCall = PerformTailCallOpt && I.isTailCall();
+
+  LowerCallTo(&I, Callee, isTailCall);
 }
 
 
@@ -4539,9 +4581,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> Parts;
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     // Copy the legal parts from the registers.
-    MVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI->getNumRegisters(ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -4570,7 +4612,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
           // FIXME: We capture more information than the dag can represent.  For
           // now, just use the tightest assertzext/assertsext possible.
           bool isSExt = true;
-          MVT FromVT(MVT::Other);
+          EVT FromVT(MVT::Other);
           if (NumSignBits == RegSize)
             isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
           else if (NumZeroBits >= RegSize-1)
@@ -4620,9 +4662,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    MVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI->getNumRegisters(ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
 
     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
                    &Parts[Part], NumParts, RegisterVT);
@@ -4665,15 +4707,15 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code,
                                         bool HasMatching,unsigned MatchingIdx,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
-  MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
   assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
   unsigned Flag = Code | (Regs.size() << 3);
   if (HasMatching)
     Flag |= 0x80000000 | (MatchingIdx << 16);
   Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    EVT RegisterVT = RegVTs[Value];
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
@@ -4688,11 +4730,11 @@ static const TargetRegisterClass *
 isAllocatableRegister(unsigned Reg, MachineFunction &MF,
                       const TargetLowering &TLI,
                       const TargetRegisterInfo *TRI) {
-  MVT FoundVT = MVT::Other;
+  EVT FoundVT = MVT::Other;
   const TargetRegisterClass *FoundRC = 0;
   for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
        E = TRI->regclass_end(); RCI != E; ++RCI) {
-    MVT ThisVT = MVT::Other;
+    EVT ThisVT = MVT::Other;
 
     const TargetRegisterClass *RC = *RCI;
     // If none of the the value types for this register class are valid, we
@@ -4765,10 +4807,11 @@ public:
     }
   }
 
-  /// getCallOperandValMVT - Return the MVT of the Value* that this operand
+  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
-  MVT getCallOperandValMVT(const TargetLowering &TLI,
+  EVT getCallOperandValEVT(LLVMContext &Context, 
+                           const TargetLowering &TLI,
                            const TargetData *TD) const {
     if (CallOperandVal == 0) return MVT::Other;
 
@@ -4794,7 +4837,7 @@ public:
       case 32:
       case 64:
       case 128:
-        OpTy = IntegerType::get(BitSize);
+        OpTy = IntegerType::get(Context, BitSize);
         break;
       }
     }
@@ -4830,6 +4873,8 @@ void SelectionDAGLowering::
 GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
                      std::set<unsigned> &OutputRegs,
                      std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = FuncInfo.Fn->getContext();
+
   // Compute whether this value requires an input register, an output register,
   // or both.
   bool isOutReg = false;
@@ -4869,10 +4914,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
     // value disagrees with the register class we plan to stick this in.
     if (OpInfo.Type == InlineAsm::isInput &&
         PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
-      // Try to convert to the first MVT that the reg class contains.  If the
+      // Try to convert to the first EVT that the reg class contains.  If the
       // types are identical size, use a bitcast to convert (e.g. two differing
       // vector types).
-      MVT RegVT = *PhysReg.second->vt_begin();
+      EVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
@@ -4882,18 +4927,19 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
         // bitcast to the corresponding integer type.  This turns an f64 value
         // into i64, which can be passed with two i32 values on a 32-bit
         // machine.
-        RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+        RegVT = EVT::getIntegerVT(Context, 
+                                  OpInfo.ConstraintVT.getSizeInBits());
         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       }
     }
 
-    NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
   }
 
-  MVT RegVT;
-  MVT ValueVT = OpInfo.ConstraintVT;
+  EVT RegVT;
+  EVT ValueVT = OpInfo.ConstraintVT;
 
   // If this is a constraint for a specific physical register, like {r17},
   // assign it now.
@@ -5047,7 +5093,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
     ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
-    MVT OpVT = MVT::Other;
+    EVT OpVT = MVT::Other;
 
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
@@ -5060,7 +5106,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
-      assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+             "Bad inline asm!");
       if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
         OpVT = TLI.getValueType(STy->getElementType(ResNo));
       } else {
@@ -5080,13 +5127,16 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
     // If this is an input or an indirect output, process the call argument.
     // BasicBlocks are labels, currently appearing only in asm's.
     if (OpInfo.CallOperandVal) {
+      // Strip bitcasts, if any.  This mostly comes up for functions.
+      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
+
       if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT = OpInfo.getCallOperandValMVT(TLI, TD);
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
     }
 
     OpInfo.ConstraintVT = OpVT;
@@ -5108,9 +5158,9 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
              Input.ConstraintVT.isInteger()) ||
             (OpInfo.ConstraintVT.getSizeInBits() !=
              Input.ConstraintVT.getSizeInBits())) {
-          cerr << "llvm: error: Unsupported asm: input constraint with a "
-               << "matching output constraint of incompatible type!\n";
-          exit(1);
+          llvm_report_error("Unsupported asm: input constraint"
+                            " with a matching output constraint of incompatible"
+                            " type!");
         }
         Input.ConstraintVT = OpInfo.ConstraintVT;
       }
@@ -5213,9 +5263,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
       if (OpInfo.AssignedRegs.Regs.empty()) {
-        cerr << "llvm: error: Couldn't allocate output reg for constraint '"
-             << OpInfo.ConstraintCode << "'!\n";
-        exit(1);
+        llvm_report_error("Couldn't allocate output reg for"
+                          " constraint '" + OpInfo.ConstraintCode + "'!");
       }
 
       // If this is an indirect operand, store through the pointer after the
@@ -5225,7 +5274,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
                                                       OpInfo.CallOperandVal));
       } else {
         // This is the result value of the call.
-        assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+               "Bad inline asm!");
         // Concatenate this output onto the outputs list.
         RetValRegs.append(OpInfo.AssignedRegs);
       }
@@ -5268,15 +5318,13 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
             || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
           if (OpInfo.isIndirect) {
-            cerr << "llvm: error: "
-                    "Don't know how to handle tied indirect "
-                    "register inputs yet!\n";
-            exit(1);
+            llvm_report_error("Don't know how to handle tied indirect "
+                              "register inputs yet!");
           }
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
-          MVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
           MatchedRegs.RegVTs.push_back(RegVT);
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
@@ -5313,9 +5361,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                          hasMemory, Ops, DAG);
         if (Ops.empty()) {
-          cerr << "llvm: error: Invalid operand for inline asm constraint '"
-               << OpInfo.ConstraintCode << "'!\n";
-          exit(1);
+          llvm_report_error("Invalid operand for inline asm"
+                            " constraint '" + OpInfo.ConstraintCode + "'!");
         }
 
         // Add information to the INLINEASM node to know about this input.
@@ -5345,9 +5392,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 
       // Copy the input into the appropriate registers.
       if (OpInfo.AssignedRegs.Regs.empty()) {
-        cerr << "llvm: error: Couldn't allocate output reg for constraint '"
-             << OpInfo.ConstraintCode << "'!\n";
-        exit(1);
+        llvm_report_error("Couldn't allocate input reg for"
+                          " constraint '"+ OpInfo.ConstraintCode +"'!");
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
@@ -5385,7 +5431,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      MVT ResultType = TLI.getValueType(CS.getType());
+      EVT ResultType = TLI.getValueType(CS.getType());
 
       // If any of the results of the inline asm is a vector, it may have the
       // wrong width/num elts.  This can happen for register classes that can
@@ -5449,45 +5495,56 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) {
   // multiply on 64-bit targets.
   // FIXME: Malloc inst should go away: PR715.
   uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType());
-  if (ElementSize != 1)
+  if (ElementSize != 1) {
+    // Src is always 32-bits, make sure the constant fits.
+    assert(Src.getValueType() == MVT::i32);
+    ElementSize = (uint32_t)ElementSize;
     Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
                       Src, DAG.getConstant(ElementSize, Src.getValueType()));
+  }
   
-  MVT IntPtr = TLI.getPointerTy();
+  EVT IntPtr = TLI.getPointerTy();
 
-  if (IntPtr.bitsLT(Src.getValueType()))
-    Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src);
-  else if (IntPtr.bitsGT(Src.getValueType()))
-    Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src);
+  Src = DAG.getZExtOrTrunc(Src, getCurDebugLoc(), IntPtr);
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Src;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
   Args.push_back(Entry);
 
+  bool isTailCall = PerformTailCallOpt &&
+                    isInTailCallPosition(&I, Attribute::None, TLI);
   std::pair<SDValue,SDValue> Result =
     TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false,
-                    0, CallingConv::C, PerformTailCallOpt,
+                    0, CallingConv::C, isTailCall,
+                    /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("malloc", IntPtr),
                     Args, DAG, getCurDebugLoc());
-  setValue(&I, Result.first);  // Pointers always fit in registers
-  DAG.setRoot(Result.second);
+  if (Result.first.getNode())
+    setValue(&I, Result.first);  // Pointers always fit in registers
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
 }
 
 void SelectionDAGLowering::visitFree(FreeInst &I) {
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = getValue(I.getOperand(0));
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
   Args.push_back(Entry);
-  MVT IntPtr = TLI.getPointerTy();
+  EVT IntPtr = TLI.getPointerTy();
+  bool isTailCall = PerformTailCallOpt &&
+                    isInTailCallPosition(&I, Attribute::None, TLI);
   std::pair<SDValue,SDValue> Result =
-    TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false,
-                    0, CallingConv::C, PerformTailCallOpt,
+    TLI.LowerCallTo(getRoot(), Type::getVoidTy(*DAG.getContext()),
+                    false, false, false, false,
+                    0, CallingConv::C, isTailCall,
+                    /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("free", IntPtr), Args, DAG,
                     getCurDebugLoc());
-  DAG.setRoot(Result.second);
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
 }
 
 void SelectionDAGLowering::visitVAStart(CallInst &I) {
@@ -5521,161 +5578,31 @@ void SelectionDAGLowering::visitVACopy(CallInst &I) {
                           DAG.getSrcValue(I.getOperand(2))));
 }
 
-/// TargetLowering::LowerArguments - This is the default LowerArguments
-/// implementation, which just inserts a FORMAL_ARGUMENTS node.  FIXME: When all
-/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be
-/// integrated into SDISel.
-void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &ArgValues,
-                                    DebugLoc dl) {
-  // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
-  SmallVector<SDValue, 3+16> Ops;
-  Ops.push_back(DAG.getRoot());
-  Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
-  Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
-
-  // Add one result value for each formal argument.
-  SmallVector<MVT, 16> RetVals;
-  unsigned j = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
-       I != E; ++I, ++j) {
-    SmallVector<MVT, 4> ValueVTs;
-    ComputeValueVTs(*this, I->getType(), ValueVTs);
-    for (unsigned Value = 0, NumValues = ValueVTs.size();
-         Value != NumValues; ++Value) {
-      MVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForMVT();
-      ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment =
-        getTargetData()->getABITypeAlignment(ArgTy);
-
-      if (F.paramHasAttr(j, Attribute::ZExt))
-        Flags.setZExt();
-      if (F.paramHasAttr(j, Attribute::SExt))
-        Flags.setSExt();
-      if (F.paramHasAttr(j, Attribute::InReg))
-        Flags.setInReg();
-      if (F.paramHasAttr(j, Attribute::StructRet))
-        Flags.setSRet();
-      if (F.paramHasAttr(j, Attribute::ByVal)) {
-        Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(I->getType());
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
-        if (F.getParamAlignment(j))
-          FrameAlign = F.getParamAlignment(j);
-        Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
-      }
-      if (F.paramHasAttr(j, Attribute::Nest))
-        Flags.setNest();
-      Flags.setOrigAlign(OriginalAlignment);
-
-      MVT RegisterVT = getRegisterType(VT);
-      unsigned NumRegs = getNumRegisters(VT);
-      for (unsigned i = 0; i != NumRegs; ++i) {
-        RetVals.push_back(RegisterVT);
-        ISD::ArgFlagsTy MyFlags = Flags;
-        if (NumRegs > 1 && i == 0)
-          MyFlags.setSplit();
-        // if it isn't first piece, alignment must be 1
-        else if (i > 0)
-          MyFlags.setOrigAlign(1);
-        Ops.push_back(DAG.getArgFlags(MyFlags));
-      }
-    }
-  }
-
-  RetVals.push_back(MVT::Other);
-
-  // Create the node.
-  SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl,
-                               DAG.getVTList(&RetVals[0], RetVals.size()),
-                               &Ops[0], Ops.size()).getNode();
-
-  // Prelower FORMAL_ARGUMENTS.  This isn't required for functionality, but
-  // allows exposing the loads that may be part of the argument access to the
-  // first DAGCombiner pass.
-  SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG);
-
-  // The number of results should match up, except that the lowered one may have
-  // an extra flag result.
-  assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() ||
-          (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() &&
-           TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag))
-         && "Lowering produced unexpected number of results!");
-
-  // The FORMAL_ARGUMENTS node itself is likely no longer needed.
-  if (Result != TmpRes.getNode() && Result->use_empty()) {
-    HandleSDNode Dummy(DAG.getRoot());
-    DAG.RemoveDeadNode(Result);
-  }
-
-  Result = TmpRes.getNode();
-
-  unsigned NumArgRegs = Result->getNumValues() - 1;
-  DAG.setRoot(SDValue(Result, NumArgRegs));
-
-  // Set up the return result vector.
-  unsigned i = 0;
-  unsigned Idx = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
-      ++I, ++Idx) {
-    SmallVector<MVT, 4> ValueVTs;
-    ComputeValueVTs(*this, I->getType(), ValueVTs);
-    for (unsigned Value = 0, NumValues = ValueVTs.size();
-         Value != NumValues; ++Value) {
-      MVT VT = ValueVTs[Value];
-      MVT PartVT = getRegisterType(VT);
-
-      unsigned NumParts = getNumRegisters(VT);
-      SmallVector<SDValue, 4> Parts(NumParts);
-      for (unsigned j = 0; j != NumParts; ++j)
-        Parts[j] = SDValue(Result, i++);
-
-      ISD::NodeType AssertOp = ISD::DELETED_NODE;
-      if (F.paramHasAttr(Idx, Attribute::SExt))
-        AssertOp = ISD::AssertSext;
-      else if (F.paramHasAttr(Idx, Attribute::ZExt))
-        AssertOp = ISD::AssertZext;
-
-      ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts,
-                                           PartVT, VT, AssertOp));
-    }
-  }
-  assert(i == NumArgRegs && "Argument register count mismatch!");
-}
-
-
 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
-/// implementation, which just inserts an ISD::CALL node, which is later custom
-/// lowered by the target to something concrete.  FIXME: When all targets are
-/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
 std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             bool RetSExt, bool RetZExt, bool isVarArg,
                             bool isInreg, unsigned NumFixedArgs,
-                            unsigned CallingConv, bool isTailCall,
+                            CallingConv::ID CallConv, bool isTailCall,
+                            bool isReturnValueUsed,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+
   assert((!isTailCall || PerformTailCallOpt) &&
          "isTailCall set when tail-call optimizations are disabled!");
 
-  SmallVector<SDValue, 32> Ops;
-  Ops.push_back(Chain);   // Op#0 - Chain
-  Ops.push_back(Callee);
-
   // Handle all of the outgoing arguments.
+  SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    SmallVector<MVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
     for (unsigned Value = 0, NumValues = ValueVTs.size();
          Value != NumValues; ++Value) {
-      MVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForMVT();
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
       SDValue Op = SDValue(Args[i].Node.getNode(),
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
@@ -5707,8 +5634,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
-      MVT PartVT = getRegisterType(VT);
-      unsigned NumParts = getNumRegisters(VT);
+      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -5719,75 +5646,105 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
 
       getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
 
-      for (unsigned i = 0; i != NumParts; ++i) {
+      for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
-        ISD::ArgFlagsTy MyFlags = Flags;
-        if (NumParts > 1 && i == 0)
-          MyFlags.setSplit();
-        else if (i != 0)
-          MyFlags.setOrigAlign(1);
-
-        Ops.push_back(Parts[i]);
-        Ops.push_back(DAG.getArgFlags(MyFlags));
+        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+        if (NumParts > 1 && j == 0)
+          MyFlags.Flags.setSplit();
+        else if (j != 0)
+          MyFlags.Flags.setOrigAlign(1);
+
+        Outs.push_back(MyFlags);
       }
     }
   }
 
-  // Figure out the result value types. We start by making a list of
-  // the potentially illegal return value types.
-  SmallVector<MVT, 4> LoweredRetTys;
-  SmallVector<MVT, 4> RetTys;
+  // Handle the incoming return values from the call.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
   ComputeValueVTs(*this, RetTy, RetTys);
-
-  // Then we translate that to a list of legal types.
   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    MVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(VT);
-    unsigned NumRegs = getNumRegisters(VT);
-    for (unsigned i = 0; i != NumRegs; ++i)
-      LoweredRetTys.push_back(RegisterVT);
-  }
-
-  LoweredRetTys.push_back(MVT::Other);  // Always has a chain.
-
-  // Create the CALL node.
-  SDValue Res = DAG.getCall(CallingConv, dl,
-                            isVarArg, isTailCall, isInreg,
-                            DAG.getVTList(&LoweredRetTys[0],
-                                          LoweredRetTys.size()),
-                            &Ops[0], Ops.size(), NumFixedArgs
-                            );
-  Chain = Res.getValue(LoweredRetTys.size() - 1);
-
-  // Gather up the call result into a single value.
-  if (RetTy != Type::VoidTy && !RetTys.empty()) {
-    ISD::NodeType AssertOp = ISD::DELETED_NODE;
-
-    if (RetSExt)
-      AssertOp = ISD::AssertSext;
-    else if (RetZExt)
-      AssertOp = ISD::AssertZext;
-
-    SmallVector<SDValue, 4> ReturnValues;
-    unsigned RegNo = 0;
-    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-      MVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(VT);
-      unsigned NumRegs = getNumRegisters(VT);
-      unsigned RegNoEnd = NumRegs + RegNo;
-      SmallVector<SDValue, 4> Results;
-      for (; RegNo != RegNoEnd; ++RegNo)
-        Results.push_back(Res.getValue(RegNo));
-      SDValue ReturnValue =
-        getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT,
-                         AssertOp);
-      ReturnValues.push_back(ReturnValue);
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT;
+      MyFlags.Used = isReturnValueUsed;
+      if (RetSExt)
+        MyFlags.Flags.setSExt();
+      if (RetZExt)
+        MyFlags.Flags.setZExt();
+      if (isInreg)
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
     }
-    Res = DAG.getNode(ISD::MERGE_VALUES, dl,
-                      DAG.getVTList(&RetTys[0], RetTys.size()),
-                      &ReturnValues[0], ReturnValues.size());
   }
 
+  // Check if target-dependent constraints permit a tail call here.
+  // Target-independent constraints should be checked by the caller.
+  if (isTailCall &&
+      !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
+    isTailCall = false;
+
+  SmallVector<SDValue, 4> InVals;
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+                    Outs, Ins, dl, DAG, InVals);
+
+  // Verify that the target's LowerCall behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerCall didn't return a valid chain!");
+  assert((!isTailCall || InVals.empty()) &&
+         "LowerCall emitted a return value for a tail call!");
+  assert((isTailCall || InVals.size() == Ins.size()) &&
+         "LowerCall didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
+  // For a tail call, the return value is merely live-out and there aren't
+  // any nodes in the DAG representing it. Return a special value to
+  // indicate that a tail call has been emitted and no more Instructions
+  // should be processed in the current block.
+  if (isTailCall) {
+    DAG.setRoot(Chain);
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  // Collect the legal value parts into potentially illegal values
+  // that correspond to the original function's return values.
+  ISD::NodeType AssertOp = ISD::DELETED_NODE;
+  if (RetSExt)
+    AssertOp = ISD::AssertSext;
+  else if (RetZExt)
+    AssertOp = ISD::AssertZext;
+  SmallVector<SDValue, 4> ReturnValues;
+  unsigned CurReg = 0;
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+    SDValue ReturnValue =
+      getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT,
+                       AssertOp);
+    ReturnValues.push_back(ReturnValue);
+    CurReg += NumRegs;
+  }
+
+  // For a function returning void, there is no return value. We can't create
+  // such a node, so we just return a null return value in that case. In
+  // that case, nothing will actualy look at the value.
+  if (ReturnValues.empty())
+    return std::make_pair(SDValue(), Chain);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+                            DAG.getVTList(&RetTys[0], RetTys.size()),
+                            &ReturnValues[0], ReturnValues.size());
+
   return std::make_pair(Res, Chain);
 }
 
@@ -5800,8 +5757,7 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
 }
 
 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
-  assert(0 && "LowerOperation not implemented for this target!");
-  abort();
+  llvm_unreachable("LowerOperation not implemented for this target!");
   return SDValue();
 }
 
@@ -5813,7 +5769,7 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
          "Copy from a reg to the same reg!");
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
-  RegsForValue RFV(TLI, Reg, V->getType());
+  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
   RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
   PendingExports.push_back(Chain);
@@ -5825,25 +5781,122 @@ void SelectionDAGISel::
 LowerArguments(BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
   Function &F = *LLVMBB->getParent();
-  SDValue OldRoot = SDL->DAG.getRoot();
-  SmallVector<SDValue, 16> Args;
-  TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc());
-
-  unsigned a = 0;
-  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
-       AI != E; ++AI) {
-    SmallVector<MVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, AI->getType(), ValueVTs);
+  SelectionDAG &DAG = SDL->DAG;
+  SDValue OldRoot = DAG.getRoot();
+  DebugLoc dl = SDL->getCurDebugLoc();
+  const TargetData *TD = TLI.getTargetData();
+
+  // Set up the incoming argument description vector.
+  SmallVector<ISD::InputArg, 16> Ins;
+  unsigned Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++Idx) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    bool isArgValueUsed = !I->use_empty();
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        TD->getABITypeAlignment(ArgTy);
+
+      if (F.paramHasAttr(Idx, Attribute::ZExt))
+        Flags.setZExt();
+      if (F.paramHasAttr(Idx, Attribute::SExt))
+        Flags.setSExt();
+      if (F.paramHasAttr(Idx, Attribute::InReg))
+        Flags.setInReg();
+      if (F.paramHasAttr(Idx, Attribute::StructRet))
+        Flags.setSRet();
+      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(I->getType());
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
+        if (F.getParamAlignment(Idx))
+          FrameAlign = F.getParamAlignment(Idx);
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (F.paramHasAttr(Idx, Attribute::Nest))
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        if (NumRegs > 1 && i == 0)
+          MyFlags.Flags.setSplit();
+        // if it isn't first piece, alignment must be 1
+        else if (i > 0)
+          MyFlags.Flags.setOrigAlign(1);
+        Ins.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Call the target to set up the argument values.
+  SmallVector<SDValue, 8> InVals;
+  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+                                             F.isVarArg(), Ins,
+                                             dl, DAG, InVals);
+
+  // Verify that the target's LowerFormalArguments behaved as expected.
+  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+         "LowerFormalArguments didn't return a valid chain!");
+  assert(InVals.size() == Ins.size() &&
+         "LowerFormalArguments didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerFormalArguments emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerFormalArguments emitted a value with the wrong type!");
+        });
+
+  // Update the DAG with the new chain value resulting from argument lowering.
+  DAG.setRoot(NewRoot);
+
+  // Set up the argument values.
+  unsigned i = 0;
+  Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+      ++I, ++Idx) {
+    SmallVector<SDValue, 4> ArgValues;
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
-    if (!AI->use_empty()) {
-      SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues,
-                                                SDL->getCurDebugLoc()));
+    for (unsigned Value = 0; Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+      if (!I->use_empty()) {
+        ISD::NodeType AssertOp = ISD::DELETED_NODE;
+        if (F.paramHasAttr(Idx, Attribute::SExt))
+          AssertOp = ISD::AssertSext;
+        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+          AssertOp = ISD::AssertZext;
+
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+                                             PartVT, VT, AssertOp));
+      }
+      i += NumParts;
+    }
+    if (!I->use_empty()) {
+      SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
+                                          SDL->getCurDebugLoc()));
       // If this argument is live outside of the entry block, insert a copy from
       // whereever we got it to the vreg that other BB's will reference it as.
-      SDL->CopyToExportRegsIfNeeded(AI);
+      SDL->CopyToExportRegsIfNeeded(I);
     }
-    a += NumValues;
   }
+  assert(i == InVals.size() && "Argument register count mismatch!");
 
   // Finally, if the target has anything special to do, allow it to do so.
   // FIXME: this should insert code into the DAG!
@@ -5908,11 +5961,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
 
       // Remember that this register needs to added to the machine PHI node as
       // the input for this MBB.
-      SmallVector<MVT, 4> ValueVTs;
+      SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        MVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(VT);
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
           SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
@@ -5962,11 +6015,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
       // own moves. Second, this check is necessary becuase FastISel doesn't
       // use CreateRegForValue to create registers, so it always creates
       // exactly one register for each non-void instruction.
-      MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
         // Promote MVT::i1.
         if (VT == MVT::i1)
-          VT = TLI.getTypeToTransformTo(VT);
+          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
         else {
           SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
           return false;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index 057c841..06acc8a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -15,6 +15,7 @@
 #define SELECTIONDAGBUILD_H
 
 #include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #ifndef NDEBUG
@@ -23,6 +24,7 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <vector>
 #include <set>
@@ -75,8 +77,6 @@ class TruncInst;
 class UIToFPInst;
 class UnreachableInst;
 class UnwindInst;
-class VICmpInst;
-class VFCmpInst;
 class VAArgInst;
 class ZExtInst;
 
@@ -117,7 +117,7 @@ public:
   SmallSet<Instruction*, 8> CatchInfoFound;
 #endif
 
-  unsigned MakeReg(MVT VT);
+  unsigned MakeReg(EVT VT);
   
   /// isExportedInst - Return true if the specified value is an instruction
   /// exported from its block.
@@ -345,9 +345,15 @@ public:
   /// BitTestCases - Vector of BitTestBlock structures used to communicate
   /// SwitchInst code generation information.
   std::vector<BitTestBlock> BitTestCases;
-  
+
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
   std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
 
+  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
+  /// scheduler custom lowering), track the change here.
+  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
+
   // Emit PHI-node-operand constants only once even if used by multiple
   // PHI nodes.
   DenseMap<Constant*, unsigned> ConstantsOut;
@@ -363,11 +369,21 @@ public:
   /// GFI - Garbage collection metadata for the function.
   GCFunctionInfo *GFI;
 
+  /// HasTailCall - This is set to true if a call in the current
+  /// block has been translated as a tail call. In this case,
+  /// no subsequent DAG nodes should be created.
+  ///
+  bool HasTailCall;
+
+  LLVMContext *Context;
+
   SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
                        FunctionLoweringInfo &funcinfo,
                        CodeGenOpt::Level ol)
     : CurDebugLoc(DebugLoc::getUnknownLoc()), 
-      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) {
+      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false),
+      Context(dag.getContext()) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
@@ -489,8 +505,6 @@ private:
   void visitAShr(User &I) { visitShift(I, ISD::SRA); }
   void visitICmp(User &I);
   void visitFCmp(User &I);
-  void visitVICmp(User &I);
-  void visitVFCmp(User &I);
   // Visit the conversion instructions
   void visitTrunc(User &I);
   void visitZExt(User &I);
@@ -539,12 +553,10 @@ private:
   void visitVACopy(CallInst &I);
 
   void visitUserOp1(Instruction &I) {
-    assert(0 && "UserOp1 should not exist at instruction selection time!");
-    abort();
+    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
   }
   void visitUserOp2(Instruction &I) {
-    assert(0 && "UserOp2 should not exist at instruction selection time!");
-    abort();
+    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
   
   const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 9d72a12..ae98da5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -16,6 +16,7 @@
 #include "SelectionDAGBuild.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
@@ -29,6 +30,7 @@
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -47,8 +49,10 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -150,12 +154,15 @@ namespace llvm {
 // insert.  The specified MachineInstr is created but not inserted into any
 // basic blocks, and the scheduler passes ownership of it to this method.
 MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *MBB) const {
-  cerr << "If a target marks an instruction with "
-       << "'usesCustomDAGSchedInserter', it must implement "
-       << "TargetLowering::EmitInstrWithCustomInserter!\n";
-  abort();
-  return 0;  
+                                                         MachineBasicBlock *MBB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+#ifndef NDEBUG
+  errs() << "If a target marks an instruction with "
+          "'usesCustomDAGSchedInserter', it must implement "
+          "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+  llvm_unreachable(0);
+  return 0;
 }
 
 /// EmitLiveInCopy - Emit a copy for a live in physical register. If the
@@ -215,8 +222,11 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB,
     --Pos;
   }
 
-  TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
-  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+  (void) Emitted;
+
+CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
   if (Coalesced) {
     if (&*InsertPos == UseMI) ++InsertPos;
     MBB->erase(UseMI);
@@ -247,8 +257,10 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
            E = MRI.livein_end(); LI != E; ++LI)
       if (LI->second) {
         const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
-                         LI->second, LI->first, RC, RC);
+        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+                                        LI->second, LI->first, RC, RC);
+        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+        (void) Emitted;
       }
   }
 }
@@ -258,7 +270,7 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
 //===----------------------------------------------------------------------===//
 
 SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
-  FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+  MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
   CurDAG(new SelectionDAG(TLI, *FuncInfo)),
   SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),
@@ -273,44 +285,42 @@ SelectionDAGISel::~SelectionDAGISel() {
   delete FuncInfo;
 }
 
-unsigned SelectionDAGISel::MakeReg(MVT VT) {
+unsigned SelectionDAGISel::MakeReg(EVT VT) {
   return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
 }
 
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<GCModuleInfo>();
   AU.addRequired<DwarfWriter>();
-  AU.setPreservesAll();
+  AU.addPreserved<DwarfWriter>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool SelectionDAGISel::runOnFunction(Function &Fn) {
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  Function &Fn = *mf.getFunction();
+
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
-  // Do not codegen any 'available_externally' functions at all, they have
-  // definitions outside the translation unit.
-  if (Fn.hasAvailableExternallyLinkage())
-    return false;
-
-
   // Get alias analysis for load/store combining.
   AA = &getAnalysis<AliasAnalysis>();
 
-  TargetMachine &TM = TLI.getTargetMachine();
-  MF = &MachineFunction::construct(&Fn, TM);
+  MF = &mf;
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
-  if (MF->getFunction()->hasGC())
-    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction());
+  if (Fn.hasGC())
+    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
   else
     GFI = 0;
   RegInfo = &MF->getRegInfo();
-  DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+  DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
@@ -358,140 +368,50 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
     }
 }
 
-/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and
-/// whether object offset >= 0.
-static bool
-IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) {
-  if (!isa<FrameIndexSDNode>(Op)) return false;
-
-  FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);
-  int FrameIdx =  FrameIdxNode->getIndex();
-  return MFI->isFixedObjectIndex(FrameIdx) &&
-    MFI->getObjectOffset(FrameIdx) >= 0;
-}
-
-/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
-/// possibly be overwritten when lowering the outgoing arguments in a tail
-/// call. Currently the implementation of this call is very conservative and
-/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
-/// virtual registers would be overwritten by direct lowering.
-static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op,
-                                                    MachineFrameInfo *MFI) {
-  RegisterSDNode * OpReg = NULL;
-  if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
-      (Op.getOpcode()== ISD::CopyFromReg &&
-       (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
-       (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
-      (Op.getOpcode() == ISD::LOAD &&
-       IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||
-      (Op.getOpcode() == ISD::MERGE_VALUES &&
-       Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD &&
-       IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()).
-                                       getOperand(1))))
-    return true;
-  return false;
-}
-
-/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
-/// DAG and fixes their tailcall attribute operand.
-static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, 
-                                           const TargetLowering& TLI) {
-  SDNode * Ret = NULL;
-  SDValue Terminator = DAG.getRoot();
-
-  // Find RET node.
-  if (Terminator.getOpcode() == ISD::RET) {
-    Ret = Terminator.getNode();
-  }
- 
-  // Fix tail call attribute of CALL nodes.
-  for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),
-         BI = DAG.allnodes_end(); BI != BE; ) {
-    --BI;
-    if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) {
-      SDValue OpRet(Ret, 0);
-      SDValue OpCall(BI, 0);
-      bool isMarkedTailCall = TheCall->isTailCall();
-      // If CALL node has tail call attribute set to true and the call is not
-      // eligible (no RET or the target rejects) the attribute is fixed to
-      // false. The TargetLowering::IsEligibleForTailCallOptimization function
-      // must correctly identify tail call optimizable calls.
-      if (!isMarkedTailCall) continue;
-      if (Ret==NULL ||
-          !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) {
-        // Not eligible. Mark CALL node as non tail call. Note that we
-        // can modify the call node in place since calls are not CSE'd.
-        TheCall->setNotTailCall();
-      } else {
-        // Look for tail call clobbered arguments. Emit a series of
-        // copyto/copyfrom virtual register nodes to protect them.
-        SmallVector<SDValue, 32> Ops;
-        SDValue Chain = TheCall->getChain(), InFlag;
-        Ops.push_back(Chain);
-        Ops.push_back(TheCall->getCallee());
-        for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
-          SDValue Arg = TheCall->getArg(i);
-          bool isByVal = TheCall->getArgFlags(i).isByVal();
-          MachineFunction &MF = DAG.getMachineFunction();
-          MachineFrameInfo *MFI = MF.getFrameInfo();
-          if (!isByVal &&
-              IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
-            MVT VT = Arg.getValueType();
-            unsigned VReg = MF.getRegInfo().
-              createVirtualRegister(TLI.getRegClassFor(VT));
-            Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(),
-                                     VReg, Arg, InFlag);
-            InFlag = Chain.getValue(1);
-            Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(),
-                                     VReg, VT, InFlag);
-            Chain = Arg.getValue(1);
-            InFlag = Arg.getValue(2);
-          }
-          Ops.push_back(Arg);
-          Ops.push_back(TheCall->getArgFlagsVal(i));
-        }
-        // Link in chain of CopyTo/CopyFromReg.
-        Ops[0] = Chain;
-        DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
-      }
-    }
-  }
-}
-
 void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
                                         BasicBlock::iterator Begin,
                                         BasicBlock::iterator End) {
   SDL->setCurrentBasicBlock(BB);
-
-  // Lower all of the non-terminator instructions.
-  for (BasicBlock::iterator I = Begin; I != End; ++I)
+  MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+
+  // Lower all of the non-terminator instructions. If a call is emitted
+  // as a tail call, cease emitting nodes for this block.
+  for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) {
+    if (MDDbgKind) {
+      // Update DebugLoc if debug information is attached with this
+      // instruction.
+      if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+        DILocation DILoc(Dbg);
+        DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+        SDL->setCurDebugLoc(Loc);
+        if (MF->getDefaultDebugLoc().isUnknown())
+          MF->setDefaultDebugLoc(Loc);
+      }
+    }
     if (!isa<TerminatorInst>(I))
       SDL->visit(*I);
+  }
 
-  // Ensure that all instructions which are used outside of their defining
-  // blocks are available as virtual registers.  Invoke is handled elsewhere.
-  for (BasicBlock::iterator I = Begin; I != End; ++I)
-    if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-      SDL->CopyToExportRegsIfNeeded(I);
+  if (!SDL->HasTailCall) {
+    // Ensure that all instructions which are used outside of their defining
+    // blocks are available as virtual registers.  Invoke is handled elsewhere.
+    for (BasicBlock::iterator I = Begin; I != End; ++I)
+      if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
+        SDL->CopyToExportRegsIfNeeded(I);
 
-  // Handle PHI nodes in successor blocks.
-  if (End == LLVMBB->end()) {
-    HandlePHINodesInSuccessorBlocks(LLVMBB);
+    // Handle PHI nodes in successor blocks.
+    if (End == LLVMBB->end()) {
+      HandlePHINodesInSuccessorBlocks(LLVMBB);
 
-    // Lower the terminator after the copies are emitted.
-    SDL->visit(*LLVMBB->getTerminator());
+      // Lower the terminator after the copies are emitted.
+      SDL->visit(*LLVMBB->getTerminator());
+    }
   }
-    
+
   // Make sure the root of the DAG is up-to-date.
   CurDAG->setRoot(SDL->getControlRoot());
 
-  // Check whether calls in this block are real tail calls. Fix up CALL nodes
-  // with correct tailcall attribute so that the target can rely on the tailcall
-  // attribute indicating whether the call is really eligible for tail call
-  // optimization.
-  if (PerformTailCallOpt)
-    CheckDAGForTailCallsAndFixThem(*CurDAG, TLI);
-
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
   SDL->clear();
@@ -500,51 +420,51 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   SmallPtrSet<SDNode*, 128> VisitedNodes;
   SmallVector<SDNode*, 128> Worklist;
-  
+
   Worklist.push_back(CurDAG->getRoot().getNode());
-  
+
   APInt Mask;
   APInt KnownZero;
   APInt KnownOne;
-  
+
   while (!Worklist.empty()) {
     SDNode *N = Worklist.back();
     Worklist.pop_back();
-    
+
     // If we've already seen this node, ignore it.
     if (!VisitedNodes.insert(N))
       continue;
-    
+
     // Otherwise, add all chain operands to the worklist.
     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
       if (N->getOperand(i).getValueType() == MVT::Other)
         Worklist.push_back(N->getOperand(i).getNode());
-    
+
     // If this is a CopyToReg with a vreg dest, process it.
     if (N->getOpcode() != ISD::CopyToReg)
       continue;
-    
+
     unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     if (!TargetRegisterInfo::isVirtualRegister(DestReg))
       continue;
-    
+
     // Ignore non-scalar or non-integer values.
     SDValue Src = N->getOperand(2);
-    MVT SrcVT = Src.getValueType();
+    EVT SrcVT = Src.getValueType();
     if (!SrcVT.isInteger() || SrcVT.isVector())
       continue;
-    
+
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
     Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
     CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
-    
+
     // Only install this information if it tells us something.
     if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
       DestReg -= TargetRegisterInfo::FirstVirtualRegister;
-      FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo();
-      if (DestReg >= FLI.LiveOutRegInfo.size())
-        FLI.LiveOutRegInfo.resize(DestReg+1);
-      FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg];
+      if (DestReg >= FuncInfo->LiveOutRegInfo.size())
+        FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+      FunctionLoweringInfo::LiveOutInfo &LOI =
+        FuncInfo->LiveOutRegInfo[DestReg];
       LOI.NumSignBits = NumSignBits;
       LOI.KnownOne = KnownOne;
       LOI.KnownZero = KnownZero;
@@ -560,10 +480,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
       ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
       ViewSUnitDAGs)
-    BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' +
-                BB->getBasicBlock()->getName();
+    BlockName = MF->getFunction()->getNameStr() + ":" +
+                BB->getBasicBlock()->getNameStr();
 
-  DOUT << "Initial selection DAG:\n";
+  DEBUG(errs() << "Initial selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -575,10 +495,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   } else {
     CurDAG->Combine(Unrestricted, *AA, OptLevel);
   }
-  
-  DOUT << "Optimized lowered selection DAG:\n";
+
+  DEBUG(errs() << "Optimized lowered selection DAG:\n");
   DEBUG(CurDAG->dump());
-  
+
   // Second step, hack on the DAG until it only uses operations and types that
   // the target supports.
   if (!DisableLegalizeTypes) {
@@ -593,7 +513,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       Changed = CurDAG->LegalizeTypes();
     }
 
-    DOUT << "Type-legalized selection DAG:\n";
+    DEBUG(errs() << "Type-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
 
     if (Changed) {
@@ -608,7 +528,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
         CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
       }
 
-      DOUT << "Optimized type-legalized selection DAG:\n";
+      DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
       DEBUG(CurDAG->dump());
     }
 
@@ -638,11 +558,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
         CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
       }
 
-      DOUT << "Optimized vector-legalized selection DAG:\n";
+      DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
       DEBUG(CurDAG->dump());
     }
   }
-  
+
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
 
   if (TimePassesIsEnabled) {
@@ -651,10 +571,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   } else {
     CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
   }
-  
-  DOUT << "Legalized selection DAG:\n";
+
+  DEBUG(errs() << "Legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
-  
+
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
 
   // Run the DAG combiner in post-legalize mode.
@@ -664,12 +584,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   } else {
     CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
   }
-  
-  DOUT << "Optimized legalized selection DAG:\n";
+
+  DEBUG(errs() << "Optimized legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
-  
+
   if (OptLevel != CodeGenOpt::None)
     ComputeLiveOutVRegInfo();
 
@@ -682,7 +602,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     InstructionSelect();
   }
 
-  DOUT << "Selected selection DAG:\n";
+  DEBUG(errs() << "Selected selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -698,13 +618,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   if (ViewSUnitDAGs) Scheduler->viewGraph();
 
-  // Emit machine code to BB.  This can change 'BB' to the last block being 
+  // Emit machine code to BB.  This can change 'BB' to the last block being
   // inserted into.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Creation", GroupName);
-    BB = Scheduler->EmitSchedule();
+    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
   } else {
-    BB = Scheduler->EmitSchedule();
+    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
   }
 
   // Free the scheduler state.
@@ -715,9 +635,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     delete Scheduler;
   }
 
-  DOUT << "Selected machine code:\n";
+  DEBUG(errs() << "Selected machine code:\n");
   DEBUG(BB->dump());
-}  
+}
 
 void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
                                             MachineFunction &MF,
@@ -736,6 +656,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 #endif
                                 );
 
+  MetadataContext &TheMetadata = Fn.getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+
   // Iterate over all basic blocks in the function.
   for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     BasicBlock *LLVMBB = &*I;
@@ -758,7 +681,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
              I != E; ++I, ++j)
           if (Fn.paramHasAttr(j, Attribute::ByVal)) {
             if (EnableFastISelVerbose || EnableFastISelAbort)
-              cerr << "FastISel skips entry block due to byval argument\n";
+              errs() << "FastISel skips entry block due to byval argument\n";
             SuppressFastISel = true;
             break;
           }
@@ -818,16 +741,29 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
       for (; BI != End; ++BI) {
+        if (MDDbgKind) {
+          // Update DebugLoc if debug information is attached with this
+          // instruction.
+          if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
+            DILocation DILoc(Dbg);
+            DebugLoc Loc = ExtractDebugLocation(DILoc,
+                                                MF.getDebugLocInfo());
+            FastIS->setCurDebugLoc(Loc);
+            if (MF.getDefaultDebugLoc().isUnknown())
+              MF.setDefaultDebugLoc(Loc);
+          }
+        }
+
         // Just before the terminator instruction, insert instructions to
         // feed PHI nodes in successor blocks.
         if (isa<TerminatorInst>(BI))
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
             if (EnableFastISelVerbose || EnableFastISelAbort) {
-              cerr << "FastISel miss: ";
+              errs() << "FastISel miss: ";
               BI->dump();
             }
-            if (EnableFastISelAbort)
-              assert(0 && "FastISel didn't handle a PHI in a successor");
+            assert(!EnableFastISelAbort &&
+                   "FastISel didn't handle a PHI in a successor");
             break;
           }
 
@@ -842,11 +778,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            cerr << "FastISel missed call: ";
+            errs() << "FastISel missed call: ";
             BI->dump();
           }
 
-          if (BI->getType() != Type::VoidTy) {
+          if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
@@ -864,13 +800,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            cerr << "FastISel miss: ";
+            errs() << "FastISel miss: ";
             BI->dump();
           }
           if (EnableFastISelAbort)
             // The "fast" selector couldn't handle something and bailed.
             // For the purpose of debugging, just abort.
-            assert(0 && "FastISel didn't select the entire block");
+            llvm_unreachable("FastISel didn't select the entire block");
         }
         break;
       }
@@ -895,15 +831,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 void
 SelectionDAGISel::FinishBasicBlock() {
 
-  DOUT << "Target-post-processed machine code:\n";
+  DEBUG(errs() << "Target-post-processed machine code:\n");
   DEBUG(BB->dump());
 
-  DOUT << "Total amount of phi nodes to update: "
-       << SDL->PHINodesToUpdate.size() << "\n";
+  DEBUG(errs() << "Total amount of phi nodes to update: "
+               << SDL->PHINodesToUpdate.size() << "\n");
   DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)
-          DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first
-               << ", " << SDL->PHINodesToUpdate[i].second << ")\n";);
-  
+          errs() << "Node " << i << " : ("
+                 << SDL->PHINodesToUpdate[i].first
+                 << ", " << SDL->PHINodesToUpdate[i].second << ")\n");
+
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
   if (SDL->SwitchCases.empty() &&
@@ -932,7 +869,7 @@ SelectionDAGISel::FinishBasicBlock() {
       CurDAG->setRoot(SDL->getRoot());
       CodeGenAndEmitDAG();
       SDL->clear();
-    }    
+    }
 
     for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
@@ -947,8 +884,8 @@ SelectionDAGISel::FinishBasicBlock() {
         SDL->visitBitTestCase(SDL->BitTestCases[i].Default,
                               SDL->BitTestCases[i].Reg,
                               SDL->BitTestCases[i].Cases[j]);
-        
-        
+
+
       CurDAG->setRoot(SDL->getRoot());
       CodeGenAndEmitDAG();
       SDL->clear();
@@ -1001,7 +938,7 @@ SelectionDAGISel::FinishBasicBlock() {
       CodeGenAndEmitDAG();
       SDL->clear();
     }
-    
+
     // Set the current basic block to the mbb we wish to insert the code into
     BB = SDL->JTCases[i].second.MBB;
     SDL->setCurrentBasicBlock(BB);
@@ -1010,7 +947,7 @@ SelectionDAGISel::FinishBasicBlock() {
     CurDAG->setRoot(SDL->getRoot());
     CodeGenAndEmitDAG();
     SDL->clear();
-    
+
     // Update PHI Nodes
     for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
       MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
@@ -1019,20 +956,21 @@ SelectionDAGISel::FinishBasicBlock() {
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
       if (PHIBB == SDL->JTCases[i].second.Default) {
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
-                                                  false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
+        PHI->addOperand
+          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
+        PHI->addOperand
+          (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
-                                                  false));
+        PHI->addOperand
+          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
   }
   SDL->JTCases.clear();
-  
+
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
   for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
@@ -1045,25 +983,31 @@ SelectionDAGISel::FinishBasicBlock() {
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
   }
-  
+
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
   for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    BB = SDL->SwitchCases[i].ThisBB;
+    MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB;
     SDL->setCurrentBasicBlock(BB);
-    
+
     // Emit the code
     SDL->visitSwitchCase(SDL->SwitchCases[i]);
     CurDAG->setRoot(SDL->getRoot());
     CodeGenAndEmitDAG();
-    SDL->clear();
-    
+
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
     // occur multiple times in PHINodesToUpdate.  We have to be very careful to
     // handle them the right number of times.
     while ((BB = SDL->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+      // If new BB's are created during scheduling, the edges may have been
+      // updated. That is, the edge from ThisBB to BB may have been split and
+      // BB's predecessor is now another block.
+      DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
+        SDL->EdgeMapping.find(BB);
+      if (EI != SDL->EdgeMapping.end())
+        ThisBB = EI->second;
       for (MachineBasicBlock::iterator Phi = BB->begin();
            Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
         // This value for this PHI node is recorded in PHINodesToUpdate, get it.
@@ -1073,21 +1017,22 @@ SelectionDAGISel::FinishBasicBlock() {
           if (SDL->PHINodesToUpdate[pn].first == Phi) {
             Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].
                                                       second, false));
-            Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB));
+            Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
             break;
           }
         }
       }
-      
+
       // Don't process RHS if same block as LHS.
       if (BB == SDL->SwitchCases[i].FalseBB)
         SDL->SwitchCases[i].FalseBB = 0;
-      
+
       // If we haven't handled the RHS, do so now.  Otherwise, we're done.
       SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;
       SDL->SwitchCases[i].FalseBB = 0;
     }
     assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);
+    SDL->clear();
   }
   SDL->SwitchCases.clear();
 
@@ -1101,12 +1046,12 @@ SelectionDAGISel::FinishBasicBlock() {
 ///
 ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
   RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
-  
+
   if (!Ctor) {
     Ctor = ISHeuristic;
     RegisterScheduler::setDefault(Ctor);
   }
-  
+
   return Ctor(this, OptLevel);
 }
 
@@ -1123,25 +1068,25 @@ ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
 /// the dag combiner simplified the 255, we still want to match.  RHS is the
 /// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
 /// specified in the .td file (e.g. 255).
-bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, 
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
                                     int64_t DesiredMaskS) const {
   const APInt &ActualMask = RHS->getAPIntValue();
   const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
-  
+
   // If the actual mask exactly matches, success!
   if (ActualMask == DesiredMask)
     return true;
-  
+
   // If the actual AND mask is allowing unallowed bits, this doesn't match.
   if (ActualMask.intersects(~DesiredMask))
     return false;
-  
+
   // Otherwise, the DAG Combiner may have proven that the value coming in is
   // either already zero or is not demanded.  Check for known zero input bits.
   APInt NeededMask = DesiredMask & ~ActualMask;
   if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
     return true;
-  
+
   // TODO: check to see if missing bits are just not demanded.
 
   // Otherwise, this pattern doesn't match.
@@ -1152,32 +1097,32 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
 /// the dag combiner simplified the 255, we still want to match.  RHS is the
 /// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
 /// specified in the .td file (e.g. 255).
-bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, 
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
                                    int64_t DesiredMaskS) const {
   const APInt &ActualMask = RHS->getAPIntValue();
   const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
-  
+
   // If the actual mask exactly matches, success!
   if (ActualMask == DesiredMask)
     return true;
-  
+
   // If the actual AND mask is allowing unallowed bits, this doesn't match.
   if (ActualMask.intersects(~DesiredMask))
     return false;
-  
+
   // Otherwise, the DAG Combiner may have proven that the value coming in is
   // either already zero or is not demanded.  Check for known zero input bits.
   APInt NeededMask = DesiredMask & ~ActualMask;
-  
+
   APInt KnownZero, KnownOne;
   CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
-  
+
   // If all the missing bits in the or are already known to be set, match!
   if ((NeededMask & KnownOne) == NeededMask)
     return true;
-  
+
   // TODO: check to see if missing bits are just not demanded.
-  
+
   // Otherwise, this pattern doesn't match.
   return false;
 }
@@ -1196,7 +1141,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
   unsigned i = 2, e = InOps.size();
   if (InOps[e-1].getValueType() == MVT::Flag)
     --e;  // Don't process a flag operand if it is here.
-  
+
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
     if ((Flags & 7) != 4 /*MEM*/) {
@@ -1210,25 +1155,25 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
       if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
-        cerr << "Could not match memory address.  Inline asm failure!\n";
-        exit(1);
+        llvm_report_error("Could not match memory address.  Inline asm"
+                          " failure!");
       }
-      
+
       // Add this to the output node.
-      MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy();
+      EVT IntPtrTy = TLI.getPointerTy();
       Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
                                               IntPtrTy));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
   }
-  
+
   // Add the flag input back if present.
   if (e != InOps.size())
     Ops.push_back(InOps.back());
 }
 
-/// findFlagUse - Return use of MVT::Flag value produced by the specified
+/// findFlagUse - Return use of EVT::Flag value produced by the specified
 /// SDNode.
 ///
 static SDNode *findFlagUse(SDNode *N) {
@@ -1331,7 +1276,7 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   // Fold. But since Fold and FU are flagged together, this will create
   // a cycle in the scheduling graph.
 
-  MVT VT = Root->getValueType(Root->getNumValues()-1);
+  EVT VT = Root->getValueType(Root->getNumValues()-1);
   while (VT == MVT::Flag) {
     SDNode *FU = findFlagUse(Root);
     if (FU == NULL)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 6fd5df2..ccc5e3c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -44,7 +44,7 @@ namespace llvm {
     }
 
     static std::string getEdgeDestLabel(const void *Node, unsigned i) {
-      return ((const SDNode *) Node)->getValueType(i).getMVTString();
+      return ((const SDNode *) Node)->getValueType(i).getEVTString();
     }
 
     /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
@@ -84,7 +84,7 @@ namespace llvm {
     template<typename EdgeIter>
     static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
       SDValue Op = EI.getNode()->getOperand(EI.getOperand());
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       if (VT == MVT::Flag)
         return "color=red,style=bold";
       else if (VT == MVT::Other)
@@ -138,11 +138,11 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false,
-            Title);
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), 
+            false, Title);
 #else
-  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
 
@@ -158,8 +158,8 @@ void SelectionDAG::clearGraphAttrs() {
 #ifndef NDEBUG
   NodeGraphAttrs.clear();
 #else
-  cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
@@ -170,8 +170,8 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
 #ifndef NDEBUG
   NodeGraphAttrs[N] = Attrs;
 #else
-  cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
@@ -188,8 +188,8 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
   else
     return "";
 #else
-  cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
   return std::string("");
 #endif
 }
@@ -200,8 +200,8 @@ void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
 #ifndef NDEBUG
   NodeGraphAttrs[N] = std::string("color=") + Color;
 #else
-  cerr << "SelectionDAG::setGraphColor is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
@@ -216,7 +216,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
   if (level >= 20) {
     if (!printed) {
       printed = true;
-      DOUT << "setSubgraphColor hit max level\n";
+      DEBUG(errs() << "setSubgraphColor hit max level\n");
     }
     return true;
   }
@@ -232,8 +232,8 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
     }
   }
 #else
-  cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
   return hit_limit;
 }
@@ -255,8 +255,8 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
   }
 
 #else
-  cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 83357e0..a2baee4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,18 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
@@ -239,12 +241,23 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::UO_F64] = "__unorddf2";
   Names[RTLIB::O_F32] = "__unordsf2";
   Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::MEMCPY] = "memcpy";
+  Names[RTLIB::MEMMOVE] = "memmove";
+  Names[RTLIB::MEMSET] = "memset";
   Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
 }
 
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+    CCs[i] = CallingConv::C;
+  }
+}
+
 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::f64)
       return FPEXT_F32_F64;
@@ -254,7 +267,7 @@ RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {
 
 /// getFPROUND - Return the FPROUND_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
   if (RetVT == MVT::f32) {
     if (OpVT == MVT::f64)
       return FPROUND_F64_F32;
@@ -273,7 +286,7 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
 
 /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::i8)
       return FPTOSINT_F32_I8;
@@ -312,7 +325,7 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
 
 /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::i8)
       return FPTOUINT_F32_I8;
@@ -351,7 +364,7 @@ RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
 
 /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::i32) {
     if (RetVT == MVT::f32)
       return SINTTOFP_I32_F32;
@@ -385,7 +398,7 @@ RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {
 
 /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::i32) {
     if (RetVT == MVT::f32)
       return UINTTOFP_I32_F32;
@@ -439,8 +452,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
   CCs[RTLIB::O_F64] = ISD::SETEQ;
 }
 
-TargetLowering::TargetLowering(TargetMachine &tm)
-  : TM(tm), TD(TM.getTargetData()) {
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -490,12 +504,10 @@ TargetLowering::TargetLowering(TargetMachine &tm)
     
   IsLittleEndian = TD->isLittleEndian();
   UsesGlobalOffsetTable = false;
-  ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType());
-  ShiftAmtHandling = Undefined;
+  ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
-  allowUnalignedMemoryAccesses = false;
   benefitFromCodePlacementOpt = false;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
@@ -515,14 +527,62 @@ TargetLowering::TargetLowering(TargetMachine &tm)
 
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
 
   // Tell Legalize whether the assembler supports DEBUG_LOC.
-  const TargetAsmInfo *TASM = TM.getTargetAsmInfo();
+  const MCAsmInfo *TASM = TM.getMCAsmInfo();
   if (!TASM || !TASM->hasDotLocAndDotFile())
     setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 }
 
-TargetLowering::~TargetLowering() {}
+TargetLowering::~TargetLowering() {
+  delete &TLOF;
+}
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+                                       unsigned &NumIntermediates,
+                                       EVT &RegisterVT,
+                                       TargetLowering* TLI) {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltTy = VT.getVectorElementType();
+  
+  unsigned NumVectorRegs = 1;
+  
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+  
+  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  if (!TLI->isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = TLI->getRegisterType(NewVT);
+  RegisterVT = DestVT;
+  if (EVT(DestVT).bitsLT(NewVT)) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return 1;
+}
 
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
@@ -546,13 +606,13 @@ void TargetLowering::computeRegisterProperties() {
   // Every integer value type larger than this largest register takes twice as
   // many registers to represent as the previous ValueType.
   for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
-    MVT EVT = (MVT::SimpleValueType)ExpandedReg;
-    if (!EVT.isInteger())
+    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+    if (!ExpandedVT.isInteger())
       break;
     NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
     RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
     TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
-    ValueTypeActions.setTypeAction(EVT, Expand);
+    ValueTypeActions.setTypeAction(ExpandedVT, Expand);
   }
 
   // Inspect all of the ValueType's smaller than the largest integer
@@ -560,7 +620,7 @@ void TargetLowering::computeRegisterProperties() {
   unsigned LegalIntReg = LargestIntReg;
   for (unsigned IntReg = LargestIntReg - 1;
        IntReg >= (unsigned)MVT::i1; --IntReg) {
-    MVT IVT = (MVT::SimpleValueType)IntReg;
+    EVT IVT = (MVT::SimpleValueType)IntReg;
     if (isTypeLegal(IVT)) {
       LegalIntReg = IntReg;
     } else {
@@ -608,20 +668,20 @@ void TargetLowering::computeRegisterProperties() {
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
     MVT VT = (MVT::SimpleValueType)i;
     if (!isTypeLegal(VT)) {
-      MVT IntermediateVT, RegisterVT;
+      MVT IntermediateVT;
+      EVT RegisterVT;
       unsigned NumIntermediates;
       NumRegistersForVT[i] =
-        getVectorTypeBreakdown(VT,
-                               IntermediateVT, NumIntermediates,
-                               RegisterVT);
+        getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT, this);
       RegisterTypeForVT[i] = RegisterVT;
       
       // Determine if there is a legal wider type.
       bool IsLegalWiderType = false;
-      MVT EltVT = VT.getVectorElementType();
+      EVT EltVT = VT.getVectorElementType();
       unsigned NElts = VT.getVectorNumElements();
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        MVT SVT = (MVT::SimpleValueType)nVT;
+        EVT SVT = (MVT::SimpleValueType)nVT;
         if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
             SVT.getVectorNumElements() > NElts) {
           TransformToType[i] = SVT;
@@ -631,7 +691,7 @@ void TargetLowering::computeRegisterProperties() {
         }
       }
       if (!IsLegalWiderType) {
-        MVT NVT = VT.getPow2VectorType();
+        EVT NVT = VT.getPow2VectorType();
         if (NVT == VT) {
           // Type is already a power of 2.  The default action is to split.
           TransformToType[i] = MVT::Other;
@@ -650,11 +710,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
 }
 
 
-MVT TargetLowering::getSetCCResultType(MVT VT) const {
-  return getValueType(TD->getIntPtrType());
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+  return PointerTy.SimpleTy;
 }
 
-
 /// getVectorTypeBreakdown - Vector types are broken down into some number of
 /// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
@@ -664,13 +723,13 @@ MVT TargetLowering::getSetCCResultType(MVT VT) const {
 /// register.  It also returns the VT and quantity of the intermediate values
 /// before they are promoted/expanded.
 ///
-unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
-                                                MVT &IntermediateVT,
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                                EVT &IntermediateVT,
                                                 unsigned &NumIntermediates,
-                                      MVT &RegisterVT) const {
+                                                EVT &RegisterVT) const {
   // Figure out the right, legal destination reg to copy into.
   unsigned NumElts = VT.getVectorNumElements();
-  MVT EltTy = VT.getVectorElementType();
+  EVT EltTy = VT.getVectorElementType();
   
   unsigned NumVectorRegs = 1;
   
@@ -683,19 +742,20 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
   
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
-  while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+  while (NumElts > 1 && !isTypeLegal(
+                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
     NumElts >>= 1;
     NumVectorRegs <<= 1;
   }
 
   NumIntermediates = NumVectorRegs;
   
-  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
   if (!isTypeLegal(NewVT))
     NewVT = EltTy;
   IntermediateVT = NewVT;
 
-  MVT DestVT = getRegisterType(NewVT);
+  EVT DestVT = getRegisterType(Context, NewVT);
   RegisterVT = DestVT;
   if (DestVT.bitsLT(NewVT)) {
     // Value is expanded, e.g. i64 -> i16.
@@ -714,7 +774,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
 /// If there is no vector type that we want to widen to, returns MVT::Other
 /// When and where to widen is target dependent based on the cost of
 /// scalarizing vs using the wider vector type.
-MVT TargetLowering::getWidenVectorType(MVT VT) const {
+EVT TargetLowering::getWidenVectorType(EVT VT) const {
   assert(VT.isVector());
   if (isTypeLegal(VT))
     return VT;
@@ -781,7 +841,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
 
     // if we can expand it to have all bits set, do it
     if (C->getAPIntValue().intersects(~Demanded)) {
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
                                 DAG.getConstant(Demanded &
                                                 C->getAPIntValue(), 
@@ -822,7 +882,7 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   if (!isPowerOf2_32(SmallVTBits))
     SmallVTBits = NextPowerOf2(SmallVTBits);
   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
-    MVT SmallVT = MVT::getIntegerVT(SmallVTBits);
+    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
         TLI.isZExtFree(SmallVT, Op.getValueType())) {
       // We found a type with free casts.
@@ -1008,7 +1068,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
     if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
       if ((KnownOne & KnownOne2) == KnownOne) {
-        MVT VT = Op.getValueType();
+        EVT VT = Op.getValueType();
         SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, 
                                                  Op.getOperand(0), ANDC));
@@ -1023,7 +1083,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // if we can expand it to have all bits set, do it
       if (Expanded.isAllOnesValue()) {
         if (Expanded != C->getAPIntValue()) {
-          MVT VT = Op.getValueType();
+          EVT VT = Op.getValueType();
           SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
                                           TLO.DAG.getConstant(Expanded, VT));
           return TLO.CombineTo(Op, New);
@@ -1099,7 +1159,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           
           SDValue NewSA = 
             TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
-          MVT VT = Op.getValueType();
+          EVT VT = Op.getValueType();
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
@@ -1116,7 +1176,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   case ISD::SRL:
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       unsigned ShAmt = SA->getZExtValue();
       unsigned VTSize = VT.getSizeInBits();
       SDValue InOp = Op.getOperand(0);
@@ -1168,7 +1228,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                                Op.getOperand(0), Op.getOperand(1)));
 
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       unsigned ShAmt = SA->getZExtValue();
       
       // If the shift count is an invalid immediate, don't do anything.
@@ -1205,7 +1265,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
 
     // Sign extension.  Compute the demanded bits in the result that are not 
     // present in the input.
@@ -1272,7 +1332,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::SIGN_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
@@ -1371,7 +1431,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::AssertZext: {
-    MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth,
                                         VT.getSizeInBits());
     if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
@@ -1385,7 +1445,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 #if 0
     // If this is an FP->Int bitcast and if the sign bit is the only thing that
     // is demanded, turn this into a FGETSIGN.
-    if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) &&
+    if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
         MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
         !MVT::isVector(Op.getOperand(0).getValueType())) {
       // Only do this xform if FGETSIGN is valid or if before legalize.
@@ -1492,7 +1552,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
   // to handle some common cases.
 
   // Fall back to ComputeMaskedBits to catch other known cases.
-  MVT OpVT = Val.getValueType();
+  EVT OpVT = Val.getValueType();
   unsigned BitWidth = OpVT.getSizeInBits();
   APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero, KnownOne;
@@ -1504,10 +1564,11 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
 /// SimplifySetCC - Try to simplify a setcc built with the specified operands 
 /// and cc. If it is unable to simplify it, return a null SDValue.
 SDValue
-TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               ISD::CondCode Cond, bool foldBooleans,
                               DAGCombinerInfo &DCI, DebugLoc dl) const {
   SelectionDAG &DAG = DCI.DAG;
+  LLVMContext &Context = *DAG.getContext();
 
   // These setcc operations always fold.
   switch (Cond) {
@@ -1518,316 +1579,321 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
   case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
   }
 
+  if (isa<ConstantSDNode>(N0.getNode())) {
+    // Ensure that the constant occurs on the RHS, and fold constant
+    // comparisons.
+    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  }
+
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
-    if (isa<ConstantSDNode>(N0.getNode())) {
-      return DAG.FoldSetCC(VT, N0, N1, Cond, dl);
-    } else {
-      // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
-      // equality comparison, then we're just comparing whether X itself is
-      // zero.
-      if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
-          N0.getOperand(0).getOpcode() == ISD::CTLZ &&
-          N0.getOperand(1).getOpcode() == ISD::Constant) {
-        unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-        if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-            ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
-          if ((C1 == 0) == (Cond == ISD::SETEQ)) {
-            // (srl (ctlz x), 5) == 0  -> X != 0
-            // (srl (ctlz x), 5) != 1  -> X != 0
-            Cond = ISD::SETNE;
-          } else {
-            // (srl (ctlz x), 5) != 0  -> X == 0
-            // (srl (ctlz x), 5) == 1  -> X == 0
-            Cond = ISD::SETEQ;
-          }
-          SDValue Zero = DAG.getConstant(0, N0.getValueType());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
-                              Zero, Cond);
+
+    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+    // equality comparison, then we're just comparing whether X itself is
+    // zero.
+    if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+        N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+        N0.getOperand(1).getOpcode() == ISD::Constant) {
+      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+        if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+          // (srl (ctlz x), 5) == 0  -> X != 0
+          // (srl (ctlz x), 5) != 1  -> X != 0
+          Cond = ISD::SETNE;
+        } else {
+          // (srl (ctlz x), 5) != 0  -> X == 0
+          // (srl (ctlz x), 5) == 1  -> X == 0
+          Cond = ISD::SETEQ;
         }
+        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+                            Zero, Cond);
       }
+    }
 
-      // If the LHS is '(and load, const)', the RHS is 0,
-      // the test is for equality or unsigned, and all 1 bits of the const are
-      // in the same partial word, see if we can shorten the load.
-      if (DCI.isBeforeLegalize() &&
-          N0.getOpcode() == ISD::AND && C1 == 0 &&
-          N0.getNode()->hasOneUse() &&
-          isa<LoadSDNode>(N0.getOperand(0)) &&
-          N0.getOperand(0).getNode()->hasOneUse() &&
-          isa<ConstantSDNode>(N0.getOperand(1))) {
-        LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
-        uint64_t bestMask = 0;
-        unsigned bestWidth = 0, bestOffset = 0;
-        if (!Lod->isVolatile() && Lod->isUnindexed() &&
-            // FIXME: This uses getZExtValue() below so it only works on i64 and
-            // below.
-            N0.getValueType().getSizeInBits() <= 64) {
-          unsigned origWidth = N0.getValueType().getSizeInBits();
-          // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
-          // 8 bits, but have to be careful...
-          if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
-            origWidth = Lod->getMemoryVT().getSizeInBits();
-          uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-          for (unsigned width = origWidth / 2; width>=8; width /= 2) {
-            uint64_t newMask = (1ULL << width) - 1;
-            for (unsigned offset=0; offset<origWidth/width; offset++) {
-              if ((newMask & Mask) == Mask) {
-                if (!TD->isLittleEndian())
-                  bestOffset = (origWidth/width - offset - 1) * (width/8);
-                else
-                  bestOffset = (uint64_t)offset * (width/8);
-                bestMask = Mask >> (offset * (width/8) * 8);
-                bestWidth = width;
-                break;
-              }
-              newMask = newMask << width;
+    // If the LHS is '(and load, const)', the RHS is 0,
+    // the test is for equality or unsigned, and all 1 bits of the const are
+    // in the same partial word, see if we can shorten the load.
+    if (DCI.isBeforeLegalize() &&
+        N0.getOpcode() == ISD::AND && C1 == 0 &&
+        N0.getNode()->hasOneUse() &&
+        isa<LoadSDNode>(N0.getOperand(0)) &&
+        N0.getOperand(0).getNode()->hasOneUse() &&
+        isa<ConstantSDNode>(N0.getOperand(1))) {
+      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+      uint64_t bestMask = 0;
+      unsigned bestWidth = 0, bestOffset = 0;
+      if (!Lod->isVolatile() && Lod->isUnindexed() &&
+          // FIXME: This uses getZExtValue() below so it only works on i64 and
+          // below.
+          N0.getValueType().getSizeInBits() <= 64) {
+        unsigned origWidth = N0.getValueType().getSizeInBits();
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
+        // 8 bits, but have to be careful...
+        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+          origWidth = Lod->getMemoryVT().getSizeInBits();
+        uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+          uint64_t newMask = (1ULL << width) - 1;
+          for (unsigned offset=0; offset<origWidth/width; offset++) {
+            if ((newMask & Mask) == Mask) {
+              if (!TD->isLittleEndian())
+                bestOffset = (origWidth/width - offset - 1) * (width/8);
+              else
+                bestOffset = (uint64_t)offset * (width/8);
+              bestMask = Mask >> (offset * (width/8) * 8);
+              bestWidth = width;
+              break;
             }
+            newMask = newMask << width;
           }
         }
-        if (bestWidth) {
-          MVT newVT = MVT::getIntegerVT(bestWidth);
-          if (newVT.isRound()) {
-            MVT PtrType = Lod->getOperand(1).getValueType();
-            SDValue Ptr = Lod->getBasePtr();
-            if (bestOffset != 0)
-              Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
-                                DAG.getConstant(bestOffset, PtrType));
-            unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
-            SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
-                                          Lod->getSrcValue(), 
-                                          Lod->getSrcValueOffset() + bestOffset,
-                                          false, NewAlign);
-            return DAG.getSetCC(dl, VT, 
-                                DAG.getNode(ISD::AND, dl, newVT, NewLoad,
-                                            DAG.getConstant(bestMask, newVT)),
-                                DAG.getConstant(0LL, newVT), Cond);
-          }
+      }
+      if (bestWidth) {
+        EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+        if (newVT.isRound()) {
+          EVT PtrType = Lod->getOperand(1).getValueType();
+          SDValue Ptr = Lod->getBasePtr();
+          if (bestOffset != 0)
+            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+                              DAG.getConstant(bestOffset, PtrType));
+          unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+          SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+                                        Lod->getSrcValue(), 
+                                        Lod->getSrcValueOffset() + bestOffset,
+                                        false, NewAlign);
+          return DAG.getSetCC(dl, VT, 
+                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+                                          DAG.getConstant(bestMask, newVT)),
+                              DAG.getConstant(0LL, newVT), Cond);
         }
       }
+    }
 
-      // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
-      if (N0.getOpcode() == ISD::ZERO_EXTEND) {
-        unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
-
-        // If the comparison constant has bits in the upper part, the
-        // zero-extended value could never match.
-        if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
-                                                C1.getBitWidth() - InSize))) {
-          switch (Cond) {
-          case ISD::SETUGT:
-          case ISD::SETUGE:
-          case ISD::SETEQ: return DAG.getConstant(0, VT);
-          case ISD::SETULT:
-          case ISD::SETULE:
-          case ISD::SETNE: return DAG.getConstant(1, VT);
-          case ISD::SETGT:
-          case ISD::SETGE:
-            // True if the sign bit of C1 is set.
-            return DAG.getConstant(C1.isNegative(), VT);
-          case ISD::SETLT:
-          case ISD::SETLE:
-            // True if the sign bit of C1 isn't set.
-            return DAG.getConstant(C1.isNonNegative(), VT);
-          default:
-            break;
-          }
-        }
+    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
 
-        // Otherwise, we can perform the comparison with the low bits.
+      // If the comparison constant has bits in the upper part, the
+      // zero-extended value could never match.
+      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+                                              C1.getBitWidth() - InSize))) {
         switch (Cond) {
-        case ISD::SETEQ:
-        case ISD::SETNE:
         case ISD::SETUGT:
         case ISD::SETUGE:
+        case ISD::SETEQ: return DAG.getConstant(0, VT);
         case ISD::SETULT:
         case ISD::SETULE:
-          return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                          DAG.getConstant(APInt(C1).trunc(InSize),
-                                          N0.getOperand(0).getValueType()),
-                          Cond);
+        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETGT:
+        case ISD::SETGE:
+          // True if the sign bit of C1 is set.
+          return DAG.getConstant(C1.isNegative(), VT);
+        case ISD::SETLT:
+        case ISD::SETLE:
+          // True if the sign bit of C1 isn't set.
+          return DAG.getConstant(C1.isNonNegative(), VT);
         default:
-          break;   // todo, be more careful with signed comparisons
-        }
-      } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-        MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
-        unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
-        MVT ExtDstTy = N0.getValueType();
-        unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
-
-        // If the extended part has any inconsistent bits, it cannot ever
-        // compare equal.  In other words, they have to be all ones or all
-        // zeros.
-        APInt ExtBits =
-          APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
-        if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
-          return DAG.getConstant(Cond == ISD::SETNE, VT);
-        
-        SDValue ZextOp;
-        MVT Op0Ty = N0.getOperand(0).getValueType();
-        if (Op0Ty == ExtSrcTy) {
-          ZextOp = N0.getOperand(0);
-        } else {
-          APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
-          ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
-                               DAG.getConstant(Imm, Op0Ty));
-        }
-        if (!DCI.isCalledByLegalizer())
-          DCI.AddToWorklist(ZextOp.getNode());
-        // Otherwise, make this a use of a zext.
-        return DAG.getSetCC(dl, VT, ZextOp, 
-                            DAG.getConstant(C1 & APInt::getLowBitsSet(
-                                                               ExtDstTyBits,
-                                                               ExtSrcTyBits), 
-                                            ExtDstTy),
-                            Cond);
-      } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
-                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-        
-        // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
-        if (N0.getOpcode() == ISD::SETCC) {
-          bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
-          if (TrueWhenTrue)
-            return N0;
-          
-          // Invert the condition.
-          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-          CC = ISD::getSetCCInverse(CC, 
-                                   N0.getOperand(0).getValueType().isInteger());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
-        }
-        
-        if ((N0.getOpcode() == ISD::XOR ||
-             (N0.getOpcode() == ISD::AND && 
-              N0.getOperand(0).getOpcode() == ISD::XOR &&
-              N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
-            isa<ConstantSDNode>(N0.getOperand(1)) &&
-            cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
-          // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
-          // can only do this if the top bits are known zero.
-          unsigned BitWidth = N0.getValueSizeInBits();
-          if (DAG.MaskedValueIsZero(N0,
-                                    APInt::getHighBitsSet(BitWidth,
-                                                          BitWidth-1))) {
-            // Okay, get the un-inverted input value.
-            SDValue Val;
-            if (N0.getOpcode() == ISD::XOR)
-              Val = N0.getOperand(0);
-            else {
-              assert(N0.getOpcode() == ISD::AND && 
-                     N0.getOperand(0).getOpcode() == ISD::XOR);
-              // ((X^1)&1)^1 -> X & 1
-              Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
-                                N0.getOperand(0).getOperand(0),
-                                N0.getOperand(1));
-            }
-            return DAG.getSetCC(dl, VT, Val, N1,
-                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
-          }
+          break;
         }
       }
+
+      // Otherwise, we can perform the comparison with the low bits.
+      switch (Cond) {
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGT:
+      case ISD::SETUGE:
+      case ISD::SETULT:
+      case ISD::SETULE: {
+        EVT newVT = N0.getOperand(0).getValueType();
+        if (DCI.isBeforeLegalizeOps() ||
+            (isOperationLegal(ISD::SETCC, newVT) &&
+              getCondCodeAction(Cond, newVT)==Legal))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                              DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+                              Cond);
+        break;
+      }
+      default:
+        break;   // todo, be more careful with signed comparisons
+      }
+    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+      EVT ExtDstTy = N0.getValueType();
+      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+      // If the extended part has any inconsistent bits, it cannot ever
+      // compare equal.  In other words, they have to be all ones or all
+      // zeros.
+      APInt ExtBits =
+        APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
+      if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+        return DAG.getConstant(Cond == ISD::SETNE, VT);
       
-      APInt MinVal, MaxVal;
-      unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
-      if (ISD::isSignedIntSetCC(Cond)) {
-        MinVal = APInt::getSignedMinValue(OperandBitSize);
-        MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+      SDValue ZextOp;
+      EVT Op0Ty = N0.getOperand(0).getValueType();
+      if (Op0Ty == ExtSrcTy) {
+        ZextOp = N0.getOperand(0);
       } else {
-        MinVal = APInt::getMinValue(OperandBitSize);
-        MaxVal = APInt::getMaxValue(OperandBitSize);
+        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+        ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+                              DAG.getConstant(Imm, Op0Ty));
       }
-
-      // Canonicalize GE/LE comparisons to use GT/LT comparisons.
-      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
-        if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
-        // X >= C0 --> X > (C0-1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(C1-1, N1.getValueType()),
-                            (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(ZextOp.getNode());
+      // Otherwise, make this a use of a zext.
+      return DAG.getSetCC(dl, VT, ZextOp, 
+                          DAG.getConstant(C1 & APInt::getLowBitsSet(
+                                                              ExtDstTyBits,
+                                                              ExtSrcTyBits), 
+                                          ExtDstTy),
+                          Cond);
+    } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      
+      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+      if (N0.getOpcode() == ISD::SETCC) {
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
+        if (TrueWhenTrue)
+          return N0;
+        
+        // Invert the condition.
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        CC = ISD::getSetCCInverse(CC, 
+                                  N0.getOperand(0).getValueType().isInteger());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
-
-      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
-        if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
-        // X <= C0 --> X < (C0+1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(C1+1, N1.getValueType()),
-                            (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+      
+      if ((N0.getOpcode() == ISD::XOR ||
+            (N0.getOpcode() == ISD::AND && 
+            N0.getOperand(0).getOpcode() == ISD::XOR &&
+            N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+          isa<ConstantSDNode>(N0.getOperand(1)) &&
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+        // can only do this if the top bits are known zero.
+        unsigned BitWidth = N0.getValueSizeInBits();
+        if (DAG.MaskedValueIsZero(N0,
+                                  APInt::getHighBitsSet(BitWidth,
+                                                        BitWidth-1))) {
+          // Okay, get the un-inverted input value.
+          SDValue Val;
+          if (N0.getOpcode() == ISD::XOR)
+            Val = N0.getOperand(0);
+          else {
+            assert(N0.getOpcode() == ISD::AND && 
+                    N0.getOperand(0).getOpcode() == ISD::XOR);
+            // ((X^1)&1)^1 -> X & 1
+            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+                              N0.getOperand(0).getOperand(0),
+                              N0.getOperand(1));
+          }
+          return DAG.getSetCC(dl, VT, Val, N1,
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
       }
+    }
+    
+    APInt MinVal, MaxVal;
+    unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+    if (ISD::isSignedIntSetCC(Cond)) {
+      MinVal = APInt::getSignedMinValue(OperandBitSize);
+      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+    } else {
+      MinVal = APInt::getMinValue(OperandBitSize);
+      MaxVal = APInt::getMaxValue(OperandBitSize);
+    }
 
-      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
-        return DAG.getConstant(0, VT);      // X < MIN --> false
-      if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
-        return DAG.getConstant(1, VT);      // X >= MIN --> true
-      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
-        return DAG.getConstant(0, VT);      // X > MAX --> false
-      if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
-        return DAG.getConstant(1, VT);      // X <= MAX --> true
-
-      // Canonicalize setgt X, Min --> setne X, Min
-      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
-        return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
-      // Canonicalize setlt X, Max --> setne X, Max
-      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
-        return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
-
-      // If we have setult X, 1, turn it into seteq X, 0
-      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(MinVal, N0.getValueType()), 
-                            ISD::SETEQ);
-      // If we have setugt X, Max-1, turn it into seteq X, Max
-      else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(MaxVal, N0.getValueType()),
-                            ISD::SETEQ);
-
-      // If we have "setcc X, C0", check to see if we can shrink the immediate
-      // by changing cc.
-
-      // SETUGT X, SINTMAX  -> SETLT X, 0
-      if (Cond == ISD::SETUGT && 
-          C1 == APInt::getSignedMaxValue(OperandBitSize))
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(0, N1.getValueType()),
-                            ISD::SETLT);
-
-      // SETULT X, SINTMIN  -> SETGT X, -1
-      if (Cond == ISD::SETULT &&
-          C1 == APInt::getSignedMinValue(OperandBitSize)) {
-        SDValue ConstMinusOne =
-            DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
-                            N1.getValueType());
-        return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
-      }
+    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+    if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      // X >= C0 --> X > (C0-1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(C1-1, N1.getValueType()),
+                          (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+    }
 
-      // Fold bit comparisons when we can.
-      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-          VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
-        if (ConstantSDNode *AndRHS =
-                    dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-          MVT ShiftTy = DCI.isBeforeLegalize() ?
-            getPointerTy() : getShiftAmountTy();
-          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
-            // Perform the xform if the AND RHS is a single bit.
-            if (isPowerOf2_64(AndRHS->getZExtValue())) {
-              return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                 DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
-                                                 ShiftTy));
-            }
-          } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
-            // (X & 8) == 8  -->  (X & 8) >> 3
-            // Perform the xform if C1 is a single bit.
-            if (C1.isPowerOf2()) {
-              return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                 DAG.getConstant(C1.logBase2(), ShiftTy));
-            }
+    if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      // X <= C0 --> X < (C0+1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(C1+1, N1.getValueType()),
+                          (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+    }
+
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+      return DAG.getConstant(0, VT);      // X < MIN --> false
+    if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+      return DAG.getConstant(1, VT);      // X >= MIN --> true
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+      return DAG.getConstant(0, VT);      // X > MAX --> false
+    if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+      return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+    // Canonicalize setgt X, Min --> setne X, Min
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+    // Canonicalize setlt X, Max --> setne X, Max
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+    // If we have setult X, 1, turn it into seteq X, 0
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(MinVal, N0.getValueType()), 
+                          ISD::SETEQ);
+    // If we have setugt X, Max-1, turn it into seteq X, Max
+    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          ISD::SETEQ);
+
+    // If we have "setcc X, C0", check to see if we can shrink the immediate
+    // by changing cc.
+
+    // SETUGT X, SINTMAX  -> SETLT X, 0
+    if (Cond == ISD::SETUGT && 
+        C1 == APInt::getSignedMaxValue(OperandBitSize))
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(0, N1.getValueType()),
+                          ISD::SETLT);
+
+    // SETULT X, SINTMIN  -> SETGT X, -1
+    if (Cond == ISD::SETULT &&
+        C1 == APInt::getSignedMinValue(OperandBitSize)) {
+      SDValue ConstMinusOne =
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+                          N1.getValueType());
+      return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+    }
+
+    // Fold bit comparisons when we can.
+    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+        VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS =
+                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+        EVT ShiftTy = DCI.isBeforeLegalize() ?
+          getPointerTy() : getShiftAmountTy();
+        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+          // Perform the xform if the AND RHS is a single bit.
+          if (isPowerOf2_64(AndRHS->getZExtValue())) {
+            return DAG.getNode(ISD::SRL, dl, VT, N0,
+                                DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
+                                                ShiftTy));
+          }
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
+          // (X & 8) == 8  -->  (X & 8) >> 3
+          // Perform the xform if C1 is a single bit.
+          if (C1.isPowerOf2()) {
+            return DAG.getNode(ISD::SRL, dl, VT, N0,
+                                DAG.getConstant(C1.logBase2(), ShiftTy));
           }
         }
-    }
-  } else if (isa<ConstantSDNode>(N0.getNode())) {
-      // Ensure that the constant occurs on the RHS.
-    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+      }
   }
 
   if (isa<ConstantFPSDNode>(N0.getNode())) {
@@ -1840,7 +1906,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
     if (CFP->getValueAPF().isNaN()) {
       // If an operand is known to be a nan, we can fold it.
       switch (ISD::getUnorderedFlavor(Cond)) {
-      default: assert(0 && "Unknown flavor!");
+      default: llvm_unreachable("Unknown flavor!");
       case 0:  // Known false.
         return DAG.getConstant(0, VT);
       case 1:  // Known true.
@@ -1856,6 +1922,43 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
     // materialize 0.0.
     if (Cond == ISD::SETO || Cond == ISD::SETUO)
       return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+    // If the condition is not legal, see if we can find an equivalent one
+    // which is legal.
+    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+      // If the comparison was an awkward floating-point == or != and one of
+      // the comparison operands is infinity or negative infinity, convert the
+      // condition to a less-awkward <= or >=.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+        } else {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+        }
+      }
+    }
   }
 
   if (N0 == N1) {
@@ -2000,7 +2103,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
   SDValue Temp;
   if (N0.getValueType() == MVT::i1 && foldBooleans) {
     switch (Cond) {
-    default: assert(0 && "Unknown integer setcc!");
+    default: llvm_unreachable("Unknown integer setcc!");
     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
       Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
       N0 = DAG.getNOT(dl, Temp, MVT::i1);
@@ -2090,7 +2193,7 @@ bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
                                        const MachineFrameInfo *MFI) const {
   if (LD->getChain() != Base->getChain())
     return false;
-  MVT VT = LD->getValueType(0);
+  EVT VT = LD->getValueType(0);
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 
@@ -2171,7 +2274,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
 /// LowerXConstraint - try to replace an X constraint, which matches anything,
 /// with another that has more specific requirements based on the type of the
 /// corresponding operand.
-const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
   if (ConstraintVT.isInteger())
     return "r";
   if (ConstraintVT.isFloatingPoint())
@@ -2244,14 +2347,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 std::vector<unsigned> TargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const {
+                                  EVT VT) const {
   return std::vector<unsigned>();
 }
 
 
 std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint,
-                             MVT VT) const {
+                             EVT VT) const {
   if (Constraint[0] != '{')
     return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2280,7 +2383,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     
     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
          I != E; ++I) {
-      if (StringsEqualNoCase(RegName, RI->get(*I).AsmName))
+      if (StringsEqualNoCase(RegName, RI->getName(*I)))
         return std::make_pair(*I, RC);
     }
   }
@@ -2310,7 +2413,7 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   switch (CT) {
-  default: assert(0 && "Unknown constraint type!");
+  default: llvm_unreachable("Unknown constraint type!");
   case TargetLowering::C_Other:
   case TargetLowering::C_Unknown:
     return 0;
@@ -2406,10 +2509,13 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
   // 'X' matches anything.
   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
     // Labels and constants are handled elsewhere ('X' is the only thing
-    // that matches labels).
-    if (isa<BasicBlock>(OpInfo.CallOperandVal) ||
-        isa<ConstantInt>(OpInfo.CallOperandVal))
+    // that matches labels).  For Functions, the type here is the type of
+    // the result, which is not what we want to look at; leave them alone.
+    Value *v = OpInfo.CallOperandVal;
+    if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+      OpInfo.CallOperandVal = v;
       return;
+    }
     
     // Otherwise, try to resolve it to something we know about by looking at
     // the actual operand type.
@@ -2464,7 +2570,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
                                   std::vector<SDNode*>* Created) const {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
   
   // Check to see if we can do this.
@@ -2521,7 +2627,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
                                   std::vector<SDNode*>* Created) const {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Check to see if we can do this.
@@ -2569,45 +2675,3 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
                        DAG.getConstant(magics.s-1, getShiftAmountTy()));
   }
 }
-
-/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET
-/// node that don't prevent tail call optimization.
-static SDValue IgnoreHarmlessInstructions(SDValue node) {
-  // Found call return.
-  if (node.getOpcode() == ISD::CALL) return node;
-  // Ignore MERGE_VALUES. Will have at least one operand.
-  if (node.getOpcode() == ISD::MERGE_VALUES)
-    return IgnoreHarmlessInstructions(node.getOperand(0));
-  // Ignore ANY_EXTEND node.
-  if (node.getOpcode() == ISD::ANY_EXTEND)
-    return IgnoreHarmlessInstructions(node.getOperand(0));
-  if (node.getOpcode() == ISD::TRUNCATE)
-    return IgnoreHarmlessInstructions(node.getOperand(0));
-  // Any other node type.
-  return node;
-} 
-
-bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
-                                                    SDValue Ret) {
-  unsigned NumOps = Ret.getNumOperands();
-  // ISD::CALL results:(value0, ..., valuen, chain)
-  // ISD::RET  operands:(chain, value0, flag0, ..., valuen, flagn)
-  // Value return:
-  // Check that operand of the RET node sources from the CALL node. The RET node
-  // has at least two operands. Operand 0 holds the chain. Operand 1 holds the
-  // value.
-  // Also we need to check that there is no code in between the call and the
-  // return. Hence we also check that the incomming chain to the return sources
-  // from the outgoing chain of the call.
-  if (NumOps > 1 &&
-      IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) &&
-      Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
-    return true;
-  // void return: The RET node  has the chain result value of the CALL node as
-  // input.
-  if (NumOps == 1 &&
-      Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
-    return true;
-
-  return false;
-}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 2402f81..25a499b88 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -62,9 +62,11 @@ namespace {
     Constant *GetFrameMap(Function &F);
     const Type* GetConcreteStackEntryType(Function &F);
     void CollectRoots(Function &F);
-    static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context, 
+                                        IRBuilder<> &B, Value *BasePtr,
                                         int Idx1, const char *Name);
-    static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+                                        IRBuilder<> &B, Value *BasePtr,
                                         int Idx1, int Idx2, const char *Name);
   };
 
@@ -93,7 +95,7 @@ namespace {
 
   public:
     EscapeEnumerator(Function &F, const char *N = "cleanup")
-      : F(F), CleanupBBName(N), State(0) {}
+      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
 
     IRBuilder<> *Next() {
       switch (State) {
@@ -136,8 +138,9 @@ namespace {
           return 0;
 
         // Create a cleanup block.
-        BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F);
-        UnwindInst *UI = new UnwindInst(CleanupBB);
+        BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
+                                                   CleanupBBName, &F);
+        UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
 
         // Transform the 'call' instructions into 'invoke's branching to the
         // cleanup block. Go in reverse order to make prettier BB names.
@@ -186,8 +189,7 @@ ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
 
 Constant *ShadowStackGC::GetFrameMap(Function &F) {
   // doInitialization creates the abstract type of this value.
-
-  Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
+  const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
 
   // Truncate the ShadowStackDescriptor if some metadata is null.
   unsigned NumMeta = 0;
@@ -200,17 +202,18 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
   }
 
   Constant *BaseElts[] = {
-    ConstantInt::get(Type::Int32Ty, Roots.size(), false),
-    ConstantInt::get(Type::Int32Ty, NumMeta, false),
+    ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
+    ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
   };
 
   Constant *DescriptorElts[] = {
-    ConstantStruct::get(BaseElts, 2),
+    ConstantStruct::get(F.getContext(), BaseElts, 2, false),
     ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
                        Metadata.begin(), NumMeta)
   };
 
-  Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
+  Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
+                                           false);
 
   std::string TypeName("gc_map.");
   TypeName += utostr(NumMeta);
@@ -229,13 +232,14 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
   //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
   //        (which uses a FunctionPassManager (which segfaults (not asserts) if
   //        provided a ModulePass))).
-  Constant *GV = new GlobalVariable(FrameMap->getType(), true,
+  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
                                     GlobalVariable::InternalLinkage,
-                                    FrameMap, "__gc_" + F.getName(),
-                                    F.getParent());
+                                    FrameMap, "__gc_" + F.getName());
 
-  Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
-                              ConstantInt::get(Type::Int32Ty, 0) };
+  Constant *GEPIndices[2] = {
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+                          };
   return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
 }
 
@@ -245,7 +249,7 @@ const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
   EltTys.push_back(StackEntryTy);
   for (size_t I = 0; I != Roots.size(); I++)
     EltTys.push_back(Roots[I].second->getAllocatedType());
-  Type *Ty = StructType::get(EltTys);
+  Type *Ty = StructType::get(F.getContext(), EltTys);
 
   std::string TypeName("gc_stackentry.");
   TypeName += F.getName();
@@ -263,9 +267,11 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   void *Meta[];     // May be absent for roots without metadata.
   // };
   std::vector<const Type*> EltTys;
-  EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :)
-  EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array.
-  StructType *FrameMapTy = StructType::get(EltTys);
+  // 32 bits is ok up to a 32GB stack frame. :)
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  // Specifies length of variable length array. 
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  StructType *FrameMapTy = StructType::get(M.getContext(), EltTys);
   M.addTypeName("gc_map", FrameMapTy);
   PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
 
@@ -274,12 +280,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   FrameMap *Map;          // Pointer to constant FrameMap.
   //   void *Roots[];          // Stack roots (in-place array, so we pretend).
   // };
-  OpaqueType *RecursiveTy = OpaqueType::get();
+  OpaqueType *RecursiveTy = OpaqueType::get(M.getContext());
 
   EltTys.clear();
   EltTys.push_back(PointerType::getUnqual(RecursiveTy));
   EltTys.push_back(FrameMapPtrTy);
-  PATypeHolder LinkTyH = StructType::get(EltTys);
+  PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys);
 
   RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
   StackEntryTy = cast<StructType>(LinkTyH.get());
@@ -292,10 +298,10 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   if (!Head) {
     // If the root chain does not exist, insert a new one with linkonce
     // linkage!
-    Head = new GlobalVariable(StackEntryPtrTy, false,
+    Head = new GlobalVariable(M, StackEntryPtrTy, false,
                               GlobalValue::LinkOnceAnyLinkage,
                               Constant::getNullValue(StackEntryPtrTy),
-                              "llvm_gc_root_chain", &M);
+                              "llvm_gc_root_chain");
   } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
     Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
     Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
@@ -338,11 +344,11 @@ void ShadowStackGC::CollectRoots(Function &F) {
 }
 
 GetElementPtrInst *
-ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
                          int Idx, int Idx2, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
-                       ConstantInt::get(Type::Int32Ty, Idx),
-                       ConstantInt::get(Type::Int32Ty, Idx2) };
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
   Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
 
   assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -351,10 +357,10 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
 }
 
 GetElementPtrInst *
-ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
                          int Idx, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
-                       ConstantInt::get(Type::Int32Ty, Idx) };
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx) };
   Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
 
   assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -364,6 +370,8 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
 
 /// runOnFunction - Insert code to maintain the shadow stack.
 bool ShadowStackGC::performCustomLowering(Function &F) {
+  LLVMContext &Context = F.getContext();
+  
   // Find calls to llvm.gcroot.
   CollectRoots(F);
 
@@ -388,13 +396,14 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
 
   // Initialize the map pointer and load the current head of the shadow stack.
   Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
-  Instruction *EntryMapPtr  = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map");
+  Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
+                                        0,1,"gc_frame.map");
                               AtEntry.CreateStore(FrameMap, EntryMapPtr);
 
   // After all the allocas...
   for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
     // For each root, find the corresponding slot in the aggregate...
-    Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
+    Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
 
     // And use it in lieu of the alloca.
     AllocaInst *OriginalAlloca = Roots[I].second;
@@ -410,17 +419,19 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
   AtEntry.SetInsertPoint(IP->getParent(), IP);
 
   // Push the entry onto the shadow stack.
-  Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next");
-  Instruction *NewHeadVal   = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
-                              AtEntry.CreateStore(CurrentHead, EntryNextPtr);
-                              AtEntry.CreateStore(NewHeadVal, Head);
+  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+                                        StackEntry,0,0,"gc_frame.next");
+  Instruction *NewHeadVal   = CreateGEP(Context, AtEntry, 
+                                        StackEntry, 0, "gc_newhead");
+  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+  AtEntry.CreateStore(NewHeadVal, Head);
 
   // For each instruction that escapes...
   EscapeEnumerator EE(F, "gc_cleanup");
   while (IRBuilder<> *AtExit = EE.Next()) {
     // Pop the entry from the shadow stack. Don't reuse CurrentHead from
     // AtEntry, since that would make the value live for the entire function.
-    Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
+    Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
                                            "gc_frame.next");
     Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
                        AtExit->CreateStore(SavedHead, Head);
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index e44a138..8070570 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -158,7 +158,7 @@ void PEI::initShrinkWrappingInfo() {
   // via --shrink-wrap-func=<funcname>.
 #ifndef NDEBUG
   if (ShrinkWrapFunc != "") {
-    std::string MFName = MF->getFunction()->getName();
+    std::string MFName = MF->getFunction()->getNameStr();
     ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
   }
 #endif
@@ -185,8 +185,8 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
   initShrinkWrappingInfo();
 
   DEBUG(if (ShrinkWrapThisFunction) {
-      DOUT << "Place CSR spills/restores for "
-           << MF->getFunction()->getName() << "\n";
+      errs() << "Place CSR spills/restores for "
+             << MF->getFunction()->getName() << "\n";
     });
 
   if (calculateSets(Fn))
@@ -297,20 +297,26 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
     }
   }
 
-  DEBUG(if (ShrinkWrapDebugging >= Details) {
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << " Antic/Avail Sets:\n";
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << "iterations = " << iterations << "\n";
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
-      DOUT << "-----------------------------------------------------------\n";
-      for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
-           MBBI != MBBE; ++MBBI) {
-        MachineBasicBlock* MBB = MBBI;
-        dumpSets(MBB);
+  DEBUG({
+      if (ShrinkWrapDebugging >= Details) {
+        errs()
+          << "-----------------------------------------------------------\n"
+          << " Antic/Avail Sets:\n"
+          << "-----------------------------------------------------------\n"
+          << "iterations = " << iterations << "\n"
+          << "-----------------------------------------------------------\n"
+          << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+          << "-----------------------------------------------------------\n";
+
+        for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+             MBBI != MBBE; ++MBBI) {
+          MachineBasicBlock* MBB = MBBI;
+          dumpSets(MBB);
+        }
+
+        errs()
+          << "-----------------------------------------------------------\n";
       }
-      DOUT << "-----------------------------------------------------------\n";
     });
 }
 
@@ -357,8 +363,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // If no CSRs used, we are done.
   if (CSI.empty()) {
     DEBUG(if (ShrinkWrapThisFunction)
-            DOUT << "DISABLED: " << Fn.getFunction()->getName()
-                 << ": uses no callee-saved registers\n");
+            errs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": uses no callee-saved registers\n");
     return false;
   }
 
@@ -377,8 +383,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // implementation to functions with <= 500 MBBs.
   if (Fn.size() > 500) {
     DEBUG(if (ShrinkWrapThisFunction)
-            DOUT << "DISABLED: " << Fn.getFunction()->getName()
-                 << ": too large (" << Fn.size() << " MBBs)\n");
+            errs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": too large (" << Fn.size() << " MBBs)\n");
     ShrinkWrapThisFunction = false;
   }
 
@@ -459,8 +465,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   }
 
   if (allCSRUsesInEntryBlock) {
-    DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
-          << ": all CSRs used in EntryBlock\n");
+    DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+                 << ": all CSRs used in EntryBlock\n");
     ShrinkWrapThisFunction = false;
   } else {
     bool allCSRsUsedInEntryFanout = true;
@@ -471,8 +477,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
         allCSRsUsedInEntryFanout = false;
     }
     if (allCSRsUsedInEntryFanout) {
-      DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
-            << ": all CSRs used in imm successors of EntryBlock\n");
+      DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": all CSRs used in imm successors of EntryBlock\n");
       ShrinkWrapThisFunction = false;
     }
   }
@@ -498,9 +504,9 @@ bool PEI::calculateSets(MachineFunction &Fn) {
       if (dominatesExitNodes) {
         CSRUsedInChokePoints |= CSRUsed[MBB];
         if (CSRUsedInChokePoints == UsedCSRegs) {
-          DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
-                << ": all CSRs used in choke point(s) at "
-                << getBasicBlockName(MBB) << "\n");
+          DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+                       << ": all CSRs used in choke point(s) at "
+                       << getBasicBlockName(MBB) << "\n");
           ShrinkWrapThisFunction = false;
           break;
         }
@@ -514,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) {
     return false;
 
   DEBUG({
-      DOUT << "ENABLED: " << Fn.getFunction()->getName();
+      errs() << "ENABLED: " << Fn.getFunction()->getName();
       if (HasFastExitPath)
-        DOUT << " (fast exit path)";
-      DOUT << "\n";
+        errs() << " (fast exit path)";
+      errs() << "\n";
       if (ShrinkWrapDebugging >= BasicInfo) {
-        DOUT << "------------------------------"
+        errs() << "------------------------------"
              << "-----------------------------\n";
-        DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+        errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
         if (ShrinkWrapDebugging >= Details) {
-          DOUT << "------------------------------"
+          errs() << "------------------------------"
                << "-----------------------------\n";
           dumpAllUsed();
         }
@@ -596,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(SUCC);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              DOUT << getBasicBlockName(MBB)
+              errs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "successor " << getBasicBlockName(SUCC) << "\n");
     }
@@ -612,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(PRED);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              DOUT << getBasicBlockName(MBB)
+              errs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "predecessor " << getBasicBlockName(PRED) << "\n");
     }
@@ -650,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
         CSRUsed[EXB] |= loopSpills;
         addedUses = true;
         DEBUG(if (ShrinkWrapDebugging >= Iterations)
-                DOUT << "LOOP " << getBasicBlockName(MBB)
+                errs() << "LOOP " << getBasicBlockName(MBB)
                      << "(" << stringifyCSRegSet(loopSpills) << ")->"
                      << getBasicBlockName(EXB) << "\n");
         if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
@@ -717,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRSave[MBB]) << "\n");
 
   return placedSpills;
@@ -778,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+          errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
 
   return placedRestores;
@@ -802,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
     ++iterations;
 
     DEBUG(if (ShrinkWrapDebugging >= Iterations)
-            DOUT << "iter " << iterations
+            errs() << "iter " << iterations
                  << " --------------------------------------------------\n");
 
     // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
@@ -852,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
   unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
   numSRReduced += numSRReducedThisFunc;
   DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << "total iterations = " << iterations << " ( "
+      errs() << "-----------------------------------------------------------\n";
+      errs() << "total iterations = " << iterations << " ( "
            << Fn.getFunction()->getName()
            << " " << numSRReducedThisFunc
            << " " << Fn.size()
            << " )\n";
-      DOUT << "-----------------------------------------------------------\n";
+      errs() << "-----------------------------------------------------------\n";
       dumpSRSets();
-      DOUT << "-----------------------------------------------------------\n";
+      errs() << "-----------------------------------------------------------\n";
       if (numSRReducedThisFunc)
         verifySpillRestorePlacement();
     });
@@ -893,7 +899,7 @@ void PEI::findFastExitPath() {
     // Check the immediate successors.
     if (isReturnBlock(SUCC)) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << getBasicBlockName(SUCC) << "\n";
       break;
     }
@@ -911,7 +917,7 @@ void PEI::findFastExitPath() {
     }
     if (HasFastExitPath) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << exitPath << "\n";
       break;
     }
@@ -945,10 +951,10 @@ void PEI::verifySpillRestorePlacement() {
     if (spilled.empty())
       continue;
 
-    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(spilled)
-         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(spilled)
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
 
     if (CSRRestore[MBB].intersects(spilled)) {
       restored |= (CSRRestore[MBB] & spilled);
@@ -977,11 +983,11 @@ void PEI::verifySpillRestorePlacement() {
       if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
         if (restored != spilled) {
           CSRegSet notRestored = (spilled - restored);
-          DOUT << MF->getFunction()->getName() << ": "
-               << stringifyCSRegSet(notRestored)
-               << " spilled at " << getBasicBlockName(MBB)
-               << " are never restored on path to return "
-               << getBasicBlockName(SBB) << "\n";
+          DEBUG(errs() << MF->getFunction()->getName() << ": "
+                       << stringifyCSRegSet(notRestored)
+                       << " spilled at " << getBasicBlockName(MBB)
+                       << " are never restored on path to return "
+                       << getBasicBlockName(SBB) << "\n");
         }
         restored.clear();
       }
@@ -998,10 +1004,10 @@ void PEI::verifySpillRestorePlacement() {
     if (restored.empty())
       continue;
 
-    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(CSRSave[MBB])
-         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(restored) << "\n";
+    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB])
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(restored) << "\n");
 
     if (CSRSave[MBB].intersects(restored)) {
       spilled |= (CSRSave[MBB] & restored);
@@ -1025,23 +1031,24 @@ void PEI::verifySpillRestorePlacement() {
     }
     if (spilled != restored) {
       CSRegSet notSpilled = (restored - spilled);
-      DOUT << MF->getFunction()->getName() << ": "
-           << stringifyCSRegSet(notSpilled)
-           << " restored at " << getBasicBlockName(MBB)
-           << " are never spilled\n";
+      DEBUG(errs() << MF->getFunction()->getName() << ": "
+                   << stringifyCSRegSet(notSpilled)
+                   << " restored at " << getBasicBlockName(MBB)
+                   << " are never spilled\n");
     }
   }
 }
 
 // Debugging print methods.
 std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+  if (!MBB)
+    return "";
+
+  if (MBB->getBasicBlock())
+    return MBB->getBasicBlock()->getNameStr();
+
   std::ostringstream name;
-  if (MBB) {
-    if (MBB->getBasicBlock())
-      name << MBB->getBasicBlock()->getName();
-    else
-      name << "_MBB_" << MBB->getNumber();
-  }
+  name << "_MBB_" << MBB->getNumber();
   return name.str();
 }
 
@@ -1071,14 +1078,15 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
 }
 
 void PEI::dumpSet(const CSRegSet& s) {
-  DOUT << stringifyCSRegSet(s) << "\n";
+  DEBUG(errs() << stringifyCSRegSet(s) << "\n");
 }
 
 void PEI::dumpUsed(MachineBasicBlock* MBB) {
-  if (MBB) {
-    DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
-  }
+  DEBUG({
+      if (MBB)
+        errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
+    });
 }
 
 void PEI::dumpAllUsed() {
@@ -1090,27 +1098,29 @@ void PEI::dumpAllUsed() {
 }
 
 void PEI::dumpSets(MachineBasicBlock* MBB) {
-  if (MBB) {
-    DOUT << getBasicBlockName(MBB)           << " | "
-         << stringifyCSRegSet(CSRUsed[MBB])  << " | "
-         << stringifyCSRegSet(AnticIn[MBB])  << " | "
-         << stringifyCSRegSet(AnticOut[MBB]) << " | "
-         << stringifyCSRegSet(AvailIn[MBB])  << " | "
-         << stringifyCSRegSet(AvailOut[MBB]) << "\n";
-  }
+  DEBUG({
+      if (MBB)
+        errs() << getBasicBlockName(MBB)           << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])  << " | "
+               << stringifyCSRegSet(AnticIn[MBB])  << " | "
+               << stringifyCSRegSet(AnticOut[MBB]) << " | "
+               << stringifyCSRegSet(AvailIn[MBB])  << " | "
+               << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+    });
 }
 
 void PEI::dumpSets1(MachineBasicBlock* MBB) {
-  if (MBB) {
-    DOUT << getBasicBlockName(MBB)             << " | "
-         << stringifyCSRegSet(CSRUsed[MBB])    << " | "
-         << stringifyCSRegSet(AnticIn[MBB])    << " | "
-         << stringifyCSRegSet(AnticOut[MBB])   << " | "
-         << stringifyCSRegSet(AvailIn[MBB])    << " | "
-         << stringifyCSRegSet(AvailOut[MBB])   << " | "
-         << stringifyCSRegSet(CSRSave[MBB])    << " | "
-         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
-  }
+  DEBUG({
+      if (MBB)
+        errs() << getBasicBlockName(MBB)             << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])    << " | "
+               << stringifyCSRegSet(AnticIn[MBB])    << " | "
+               << stringifyCSRegSet(AnticOut[MBB])   << " | "
+               << stringifyCSRegSet(AvailIn[MBB])    << " | "
+               << stringifyCSRegSet(AvailOut[MBB])   << " | "
+               << stringifyCSRegSet(CSRSave[MBB])    << " | "
+               << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+    });
 }
 
 void PEI::dumpAllSets() {
@@ -1122,20 +1132,21 @@ void PEI::dumpAllSets() {
 }
 
 void PEI::dumpSRSets() {
-  for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
-       MBB != E; ++MBB) {
-    if (! CSRSave[MBB].empty()) {
-      DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
-           << stringifyCSRegSet(CSRSave[MBB]);
-      if (CSRRestore[MBB].empty())
-        DOUT << "\n";
-    }
-    if (! CSRRestore[MBB].empty()) {
-      if (! CSRSave[MBB].empty())
-        DOUT << "    ";
-      DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
-           << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
-    }
-  }
+  DEBUG({
+      for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+           MBB != E; ++MBB) {
+        if (!CSRSave[MBB].empty()) {
+          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB]);
+          if (CSRRestore[MBB].empty())
+            errs() << '\n';
+        }
+
+        if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+          errs() << "    "
+                 << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+      }
+    });
 }
 #endif
diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h
new file mode 100644
index 0000000..f69feaf
--- /dev/null
+++ b/lib/CodeGen/SimpleHazardRecognizer.h
@@ -0,0 +1,89 @@
+//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SimpleHazardRecognizer class, which
+// implements hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+  /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
+  /// a coarse classification and attempts to avoid that instructions of
+  /// a given class aren't grouped too densely together.
+  class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
+    /// Class - A simple classification for SUnits.
+    enum Class {
+      Other, Load, Store
+    };
+
+    /// Window - The Class values of the most recently issued
+    /// instructions.
+    Class Window[8];
+
+    /// getClass - Classify the given SUnit.
+    Class getClass(const SUnit *SU) {
+      const MachineInstr *MI = SU->getInstr();
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.mayLoad())
+        return Load;
+      if (TID.mayStore())
+        return Store;
+      return Other;
+    }
+
+    /// Step - Rotate the existing entries in Window and insert the
+    /// given class value in position as the most recent.
+    void Step(Class C) {
+      std::copy(Window+1, array_endof(Window), Window);
+      Window[array_lengthof(Window)-1] = C;
+    }
+
+  public:
+    SimpleHazardRecognizer() : Window() {
+      Reset();
+    }
+
+    virtual HazardType getHazardType(SUnit *SU) {
+      Class C = getClass(SU);
+      if (C == Other)
+        return NoHazard;
+      unsigned Score = 0;
+      for (unsigned i = 0; i != array_lengthof(Window); ++i)
+        if (Window[i] == C)
+          Score += i + 1;
+      if (Score > array_lengthof(Window) * 2)
+        return Hazard;
+      return NoHazard;
+    }
+
+    virtual void Reset() {
+      for (unsigned i = 0; i != array_lengthof(Window); ++i)
+        Window[i] = Other;
+    }
+
+    virtual void EmitInstruction(SUnit *SU) {
+      Step(getClass(SU));
+    }
+
+    virtual void AdvanceCycle() {
+      Step(Other);
+    }
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 7e7d6b8..9c283b0 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -17,6 +17,7 @@
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -28,6 +29,8 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -51,13 +54,8 @@ EnableJoining("join-liveintervals",
               cl::init(true));
 
 static cl::opt<bool>
-NewHeuristic("new-coalescer-heuristic",
-             cl::desc("Use new coalescer heuristic"),
-             cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-CrossClassJoin("join-cross-class-copies",
-               cl::desc("Coalesce cross register class copies"),
+DisableCrossClassJoin("disable-cross-class-join",
+               cl::desc("Avoid coalescing cross register class copies"),
                cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
@@ -65,7 +63,7 @@ PhysJoinTweak("tweak-phys-join-heuristics",
                cl::desc("Tweak heuristics for joining phys reg with vr"),
                cl::init(false), cl::Hidden);
 
-static RegisterPass<SimpleRegisterCoalescing> 
+static RegisterPass<SimpleRegisterCoalescing>
 X("simple-register-coalescing", "Simple Register Coalescing");
 
 // Declare that we implement the RegisterCoalescer interface
@@ -74,6 +72,8 @@ static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
 const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
 
 void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
   AU.addRequired<MachineLoopInfo>();
@@ -105,22 +105,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
 bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
                                                     LiveInterval &IntB,
                                                     MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
 
   // BValNo is a value number in B that is defined by a copy from A.  'B3' in
   // the example above.
   LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
   assert(BLR != IntB.end() && "Live range not found!");
   VNInfo *BValNo = BLR->valno;
-  
+
   // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we 
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->copy) return false;
+  if (!BValNo->getCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-  
+
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+  LiveIndex CopyUseIdx = li_->getUseIndex(CopyIdx);
+  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
   // If it's re-defined by an early clobber somewhere in the live range, then
@@ -143,26 +144,28 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // The coalescer has no idea there was a def in the middle of [174,230].
   if (AValNo->hasRedefByEC())
     return false;
-  
-  // If AValNo is defined as a copy from IntB, we can potentially process this.  
+
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
   // Get the instruction that defines this value number.
   unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
   if (!SrcReg) return false;  // Not defined by a copy.
-    
+
   // If the value number is not defined by a copy instruction, ignore it.
 
   // If the source register comes from an interval other than IntB, we can't
   // handle this.
   if (SrcReg != IntB.reg) return false;
-  
+
   // Get the LiveRange in IntB that this value number starts with.
-  LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1);
+  LiveInterval::iterator ValLR =
+    IntB.FindLiveRangeContaining(li_->getPrevSlot(AValNo->def));
   assert(ValLR != IntB.end() && "Live range not found!");
-  
+
   // Make sure that the end of the live range is inside the same block as
   // CopyMI.
-  MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1);
-  if (!ValLREndInst || 
+  MachineInstr *ValLREndInst =
+    li_->getInstructionFromIndex(li_->getPrevSlot(ValLR->end));
+  if (!ValLREndInst ||
       ValLREndInst->getParent() != CopyMI->getParent()) return false;
 
   // Okay, we now know that ValLR ends in the same block that the CopyMI
@@ -177,28 +180,33 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
       *tri_->getSubRegisters(IntB.reg)) {
     for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
       if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
-        DOUT << "Interfere with sub-register ";
-        DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+        DEBUG({
+            errs() << "Interfere with sub-register ";
+            li_->getInterval(*SR).print(errs(), tri_);
+          });
         return false;
       }
   }
-  
-  DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
-  
-  unsigned FillerStart = ValLR->end, FillerEnd = BLR->start;
+
+  DEBUG({
+      errs() << "\nExtending: ";
+      IntB.print(errs(), tri_);
+    });
+
+  LiveIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
   // We are about to delete CopyMI, so need to remove it as the 'instruction
   // that defines this value #'. Update the the valnum with the new defining
   // instruction #.
   BValNo->def  = FillerStart;
-  BValNo->copy = NULL;
-  
+  BValNo->setCopy(0);
+
   // Okay, we can merge them.  We need to insert a new liverange:
   // [ValLR.end, BLR.begin) of either value number, then we merge the
   // two value numbers.
   IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
 
   // If the IntB live range is assigned to a physical register, and if that
-  // physreg has sub-registers, update their live intervals as well. 
+  // physreg has sub-registers, update their live intervals as well.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
     for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
       LiveInterval &SRLI = li_->getInterval(*SR);
@@ -213,17 +221,26 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     IntB.addKills(ValLR->valno, BValNo->kills);
     IntB.MergeValueNumberInto(BValNo, ValLR->valno);
   }
-  DOUT << "   result = "; IntB.print(DOUT, tri_);
-  DOUT << "\n";
+  DEBUG({
+      errs() << "   result = ";
+      IntB.print(errs(), tri_);
+      errs() << "\n";
+    });
 
   // If the source instruction was killing the source register before the
   // merge, unset the isKill marker given the live range has been extended.
   int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
   if (UIdx != -1) {
     ValLREndInst->getOperand(UIdx).setIsKill(false);
-    IntB.removeKill(ValLR->valno, FillerStart);
+    ValLR->valno->removeKill(FillerStart);
   }
 
+  // If the copy instruction was killing the destination register before the
+  // merge, find the last use and trim the live range. That will also add the
+  // isKill marker.
+  if (CopyMI->killsRegister(IntA.reg))
+    TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
+
   ++numExtends;
   return true;
 }
@@ -253,6 +270,16 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
   return false;
 }
 
+static void
+TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
+  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+       i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      NewMI->addOperand(MO);
+  }
+}
+
 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA
 /// being the source and IntB being the dest, thus this defines a value number
 /// in IntB.  If the source value number (in IntA) is defined by a commutable
@@ -279,7 +306,8 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
 bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
                                                         LiveInterval &IntB,
                                                         MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx =
+    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
 
   // FIXME: For now, only eliminate the copy by commuting its def when the
   // source register is a virtual register. We want to guard against cases
@@ -293,15 +321,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
   assert(BLR != IntB.end() && "Live range not found!");
   VNInfo *BValNo = BLR->valno;
-  
+
   // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we 
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->copy) return false;
+  if (!BValNo->getCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-  
+
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+  LiveInterval::iterator ALR =
+    IntA.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx));
+
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
@@ -312,9 +342,23 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
   const TargetInstrDesc &TID = DefMI->getDesc();
-  unsigned NewDstIdx;
-  if (!TID.isCommutable() ||
-      !tii_->CommuteChangesDestination(DefMI, NewDstIdx))
+  if (!TID.isCommutable())
+    return false;
+  // If DefMI is a two-address instruction then commuting it will change the
+  // destination register.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+  assert(DefIdx != -1);
+  unsigned UseOpIdx;
+  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+    return false;
+  unsigned Op1, Op2, NewDstIdx;
+  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+    return false;
+  if (Op1 == UseOpIdx)
+    NewDstIdx = Op2;
+  else if (Op2 == UseOpIdx)
+    NewDstIdx = Op1;
+  else
     return false;
 
   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
@@ -332,7 +376,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
          UE = mri_->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
-    unsigned UseIdx = li_->getInstructionIndex(UseMI);
+    LiveIndex UseIdx = li_->getInstructionIndex(UseMI);
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end())
       continue;
@@ -356,8 +400,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
 
   bool BHasPHIKill = BValNo->hasPHIKill();
   SmallVector<VNInfo*, 4> BDeadValNos;
-  SmallVector<unsigned, 4> BKills;
-  std::map<unsigned, unsigned> BExtend;
+  VNInfo::KillSet BKills;
+  std::map<LiveIndex, LiveIndex> BExtend;
 
   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
   // A = or A, B
@@ -384,7 +428,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     ++UI;
     if (JoinedCopies.count(UseMI))
       continue;
-    unsigned UseIdx = li_->getInstructionIndex(UseMI);
+    LiveIndex UseIdx= li_->getUseIndex(li_->getInstructionIndex(UseMI));
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
       continue;
@@ -395,7 +439,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       if (Extended)
         UseMO.setIsKill(false);
       else
-        BKills.push_back(li_->getUseIndex(UseIdx)+1);
+        BKills.push_back(li_->getNextSlot(UseIdx));
     }
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -404,7 +448,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       // This copy will become a noop. If it's defining a new val#,
       // remove that val# as well. However this live range is being
       // extended to the end of the existing live range defined by the copy.
-      unsigned DefIdx = li_->getDefIndex(UseIdx);
+      LiveIndex DefIdx = li_->getDefIndex(UseIdx);
       const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
       BHasPHIKill |= DLR->valno->hasPHIKill();
       assert(DLR->valno->def == DefIdx);
@@ -420,7 +464,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
 
   // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
   // simply extend BLR if CopyMI doesn't end the range.
-  DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
+  DEBUG({
+      errs() << "\nExtending: ";
+      IntB.print(errs(), tri_);
+    });
 
   // Remove val#'s defined by copies that will be coalesced away.
   for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
@@ -439,24 +486,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   // is updated. Kills are also updated.
   VNInfo *ValNo = BValNo;
   ValNo->def = AValNo->def;
-  ValNo->copy = NULL;
+  ValNo->setCopy(0);
   for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
-    unsigned Kill = ValNo->kills[j];
-    if (Kill != BLR->end)
-      BKills.push_back(Kill);
+    if (ValNo->kills[j] != BLR->end)
+      BKills.push_back(ValNo->kills[j]);
   }
   ValNo->kills.clear();
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
-    unsigned End = AI->end;
-    std::map<unsigned, unsigned>::iterator EI = BExtend.find(End);
+    LiveIndex End = AI->end;
+    std::map<LiveIndex, LiveIndex>::iterator
+      EI = BExtend.find(End);
     if (EI != BExtend.end())
       End = EI->second;
     IntB.addRange(LiveRange(AI->start, End, ValNo));
 
     // If the IntB live range is assigned to a physical register, and if that
-    // physreg has sub-registers, update their live intervals as well. 
+    // physreg has sub-registers, update their live intervals as well.
     if (BHasSubRegs) {
       for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
         LiveInterval &SRLI = li_->getInterval(*SR);
@@ -467,13 +514,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   IntB.addKills(ValNo, BKills);
   ValNo->setHasPHIKill(BHasPHIKill);
 
-  DOUT << "   result = "; IntB.print(DOUT, tri_);
-  DOUT << "\n";
+  DEBUG({
+      errs() << "   result = ";
+      IntB.print(errs(), tri_);
+      errs() << '\n';
+      errs() << "\nShortening: ";
+      IntA.print(errs(), tri_);
+    });
 
-  DOUT << "\nShortening: "; IntA.print(DOUT, tri_);
   IntA.removeValNo(AValNo);
-  DOUT << "   result = "; IntA.print(DOUT, tri_);
-  DOUT << "\n";
+
+  DEBUG({
+      errs() << "   result = ";
+      IntA.print(errs(), tri_);
+      errs() << '\n';
+    });
 
   ++numCommutes;
   return true;
@@ -495,7 +550,8 @@ static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
 /// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
 /// from a physical register live interval as well as from the live intervals
 /// of its sub-registers.
-static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
+static void removeRange(LiveInterval &li,
+                        LiveIndex Start, LiveIndex End,
                         LiveIntervals *li_, const TargetRegisterInfo *tri_) {
   li.removeRange(Start, End, true);
   if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
@@ -503,14 +559,15 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
       if (!li_->hasInterval(*SR))
         continue;
       LiveInterval &sli = li_->getInterval(*SR);
-      unsigned RemoveEnd = Start;
+      LiveIndex RemoveStart = Start;
+      LiveIndex RemoveEnd = Start;
       while (RemoveEnd != End) {
-        LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start);
+        LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart);
         if (LR == sli.end())
           break;
         RemoveEnd = (LR->end < End) ? LR->end : End;
-        sli.removeRange(Start, RemoveEnd, true);
-        Start = RemoveEnd;
+        sli.removeRange(RemoveStart, RemoveEnd, true);
+        RemoveStart = RemoveEnd;
       }
     }
   }
@@ -520,14 +577,14 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
 /// as the copy instruction, trim the live interval to the last use and return
 /// true.
 bool
-SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
                                                     MachineBasicBlock *CopyMBB,
                                                     LiveInterval &li,
                                                     const LiveRange *LR) {
-  unsigned MBBStart = li_->getMBBStartIdx(CopyMBB);
-  unsigned LastUseIdx;
-  MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg,
-                                            LastUseIdx);
+  LiveIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
+  LiveIndex LastUseIdx;
+  MachineOperand *LastUse =
+    lastRegisterUse(LR->start, li_->getPrevSlot(CopyIdx), li.reg, LastUseIdx);
   if (LastUse) {
     MachineInstr *LastUseMI = LastUse->getParent();
     if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
@@ -547,7 +604,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
     // of last use.
     LastUse->setIsKill();
     removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_);
-    li.addKill(LR->valno, LastUseIdx+1);
+    LR->valno->addKill(li_->getNextSlot(LastUseIdx));
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
         DstReg == li.reg) {
@@ -560,7 +617,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
 
   // Is it livein?
   if (LR->start <= MBBStart && LR->end > MBBStart) {
-    if (LR->start == 0) {
+    if (LR->start == LiveIndex()) {
       assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
       // Live-in to the function but dead. Remove it from entry live-in set.
       mf_->begin()->removeLiveIn(li.reg);
@@ -575,8 +632,9 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
 /// computation, replace the copy by rematerialize the definition.
 bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
                                                        unsigned DstReg,
+                                                       unsigned DstSubIdx,
                                                        MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
   LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
@@ -590,24 +648,52 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   const TargetInstrDesc &TID = DefMI->getDesc();
   if (!TID.isAsCheapAsAMove())
     return false;
-  if (!DefMI->getDesc().isRematerializable() ||
-      !tii_->isTriviallyReMaterializable(DefMI))
+  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
     return false;
   bool SawStore = false;
-  if (!DefMI->isSafeToMove(tii_, SawStore))
+  if (!DefMI->isSafeToMove(tii_, SawStore, AA))
+    return false;
+  if (TID.getNumDefs() != 1)
     return false;
+  if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+    // Make sure the copy destination register class fits the instruction
+    // definition register class. The mismatch can happen as a result of earlier
+    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+    const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_);
+    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      if (mri_->getRegClass(DstReg) != RC)
+        return false;
+    } else if (!RC->contains(DstReg))
+      return false;
+  }
 
-  unsigned DefIdx = li_->getDefIndex(CopyIdx);
+  // If destination register has a sub-register index on it, make sure it mtches
+  // the instruction register class.
+  if (DstSubIdx) {
+    const TargetInstrDesc &TID = DefMI->getDesc();
+    if (TID.getNumDefs() != 1)
+      return false;
+    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+    const TargetRegisterClass *DstSubRC =
+      DstRC->getSubRegisterRegClass(DstSubIdx);
+    const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_);
+    if (DefRC == DstRC)
+      DstSubIdx = 0;
+    else if (DefRC != DstSubRC)
+      return false;
+  }
+
+  LiveIndex DefIdx = li_->getDefIndex(CopyIdx);
   const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
-  DLR->valno->copy = NULL;
+  DLR->valno->setCopy(0);
   // Don't forget to update sub-register intervals.
   if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
     for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
       if (!li_->hasInterval(*SR))
         continue;
       DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
-      if (DLR && DLR->valno->copy == CopyMI)
-        DLR->valno->copy = NULL;
+      if (DLR && DLR->valno->getCopy() == CopyMI)
+        DLR->valno->setCopy(0);
     }
   }
 
@@ -621,7 +707,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     }
 
   MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
-  tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI);
   MachineInstr *NewMI = prior(MII);
 
   if (checkForDeadDef) {
@@ -630,7 +716,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     // should mark it dead:
     if (DefMI->getParent() == MBB) {
       DefMI->addRegisterDead(SrcInt.reg, tri_);
-      SrcLR->end = SrcLR->start + 1;
+      SrcLR->end = li_->getNextSlot(SrcLR->start);
     }
   }
 
@@ -644,11 +730,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     if (MO.isDef() && li_->hasInterval(MO.getReg())) {
       unsigned Reg = MO.getReg();
       DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
-      if (DLR && DLR->valno->copy == CopyMI)
-        DLR->valno->copy = NULL;
+      if (DLR && DLR->valno->getCopy() == CopyMI)
+        DLR->valno->setCopy(0);
     }
   }
 
+  TransferImplicitOps(CopyMI, NewMI);
   li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
@@ -657,30 +744,6 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   return true;
 }
 
-/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy.
-///
-bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
-                                              unsigned DstReg) const {
-  MachineBasicBlock *MBB = CopyMI->getParent();
-  const MachineLoop *L = loopInfo->getLoopFor(MBB);
-  if (!L)
-    return false;
-  if (MBB != L->getLoopLatch())
-    return false;
-
-  LiveInterval &LI = li_->getInterval(DstReg);
-  unsigned DefIdx = li_->getInstructionIndex(CopyMI);
-  LiveInterval::const_iterator DstLR =
-    LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx));
-  if (DstLR == LI.end())
-    return false;
-  unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
-  if (DstLR->valno->kills.size() == 1 &&
-      DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill())
-    return true;
-  return false;
-}
-
 /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
 /// update the subregister number if it is not zero. If DstReg is a
 /// physical register and the existing subregister number of the def / use
@@ -714,7 +777,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
           CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
         // If the use is a copy and it won't be coalesced away, and its source
         // is defined by a trivial computation, try to rematerialize it instead.
-        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI))
+        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
+                                    CopyDstSubIdx, UseMI))
           continue;
       }
 
@@ -751,44 +815,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
         (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
          allocatableRegs_[CopyDstReg])) {
       LiveInterval &LI = li_->getInterval(CopyDstReg);
-      unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI));
+      LiveIndex DefIdx =
+        li_->getDefIndex(li_->getInstructionIndex(UseMI));
       if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) {
         if (DLR->valno->def == DefIdx)
-          DLR->valno->copy = UseMI;
+          DLR->valno->setCopy(UseMI);
       }
     }
   }
 }
 
-/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining"
-/// registers due to insert_subreg coalescing. e.g.
-/// r1024 = op
-/// r1025 = implicit_def
-/// r1025 = insert_subreg r1025, r1024
-///       = op r1025
-/// =>
-/// r1025 = op
-/// r1025 = implicit_def
-/// r1025 = insert_subreg r1025, r1025
-///       = op r1025
-void
-SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) {
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
-         E = mri_->reg_end(); I != E; ) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *DefMI = &*I;
-    ++I;
-    if (!O.isDef())
-      continue;
-    if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
-      continue;
-    if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI)))
-      continue;
-    li_->RemoveMachineInstrFromMaps(DefMI);
-    DefMI->eraseFromParent();
-  }
-}
-
 /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
 /// due to live range lengthening as the result of coalescing.
 void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
@@ -796,12 +832,27 @@ void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
          UE = mri_->use_end(); UI != UE; ++UI) {
     MachineOperand &UseMO = UI.getOperand();
-    if (UseMO.isKill()) {
-      MachineInstr *UseMI = UseMO.getParent();
-      unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
-      const LiveRange *UI = LI.getLiveRangeContaining(UseIdx);
-      if (!UI || !LI.isKill(UI->valno, UseIdx+1))
-        UseMO.setIsKill(false);
+    if (!UseMO.isKill())
+      continue;
+    MachineInstr *UseMI = UseMO.getParent();
+    LiveIndex UseIdx =
+      li_->getUseIndex(li_->getInstructionIndex(UseMI));
+    const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
+    if (!LR ||
+        (!LR->valno->isKill(li_->getNextSlot(UseIdx)) &&
+         LR->valno->def != li_->getNextSlot(UseIdx))) {
+      // Interesting problem. After coalescing reg1027's def and kill are both
+      // at the same point:  %reg1027,0.000000e+00 = [56,814:0)  0@70-(814)
+      //
+      // bb5:
+      // 60   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
+      // 68   %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0
+      // 76   t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def>
+      // 84   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
+      // 96   t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill>
+      //
+      // Do not remove the kill marker on t2LDRi12.
+      UseMO.setIsKill(false);
     }
   }
 }
@@ -830,15 +881,16 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
 /// Return true if live interval is removed.
 bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
                                                         MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+  LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI);
   LiveInterval::iterator MLR =
     li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx));
   if (MLR == li.end())
     return false;  // Already removed by ShortenDeadCopySrcLiveRange.
-  unsigned RemoveStart = MLR->start;
-  unsigned RemoveEnd = MLR->end;
+  LiveIndex RemoveStart = MLR->start;
+  LiveIndex RemoveEnd = MLR->end;
+  LiveIndex DefIdx = li_->getDefIndex(CopyIdx);
   // Remove the liverange that's defined by this.
-  if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) {
+  if (RemoveStart == DefIdx && RemoveEnd == li_->getNextSlot(DefIdx)) {
     removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
     return removeIntervalIfEmpty(li, li_, tri_);
   }
@@ -849,7 +901,7 @@ bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
 /// the val# it defines. If the live interval becomes empty, remove it as well.
 bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
                                              MachineInstr *DefMI) {
-  unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
+  LiveIndex DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
   LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
   if (DefIdx != MLR->valno->def)
     return false;
@@ -860,17 +912,18 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
 /// PropagateDeadness - Propagate the dead marker to the instruction which
 /// defines the val#.
 static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
-                              unsigned &LRStart, LiveIntervals *li_,
+                              LiveIndex &LRStart, LiveIntervals *li_,
                               const TargetRegisterInfo* tri_) {
   MachineInstr *DefMI =
     li_->getInstructionFromIndex(li_->getDefIndex(LRStart));
   if (DefMI && DefMI != CopyMI) {
-    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_);
-    if (DeadIdx != -1) {
+    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false);
+    if (DeadIdx != -1)
       DefMI->getOperand(DeadIdx).setIsDead();
-      // A dead def should have a single cycle interval.
-      ++LRStart;
-    }
+    else
+      DefMI->addOperand(MachineOperand::CreateReg(li.reg,
+                                                  true, true, false, true));
+    LRStart = li_->getNextSlot(LRStart);
   }
 }
 
@@ -881,8 +934,8 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
 bool
 SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
                                                       MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
-  if (CopyIdx == 0) {
+  LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+  if (CopyIdx == LiveIndex()) {
     // FIXME: special case: function live in. It can be a general case if the
     // first instruction index starts at > 0 value.
     assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
@@ -894,13 +947,14 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
     return removeIntervalIfEmpty(li, li_, tri_);
   }
 
-  LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1);
+  LiveInterval::iterator LR =
+    li.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx));
   if (LR == li.end())
     // Livein but defined by a phi.
     return false;
 
-  unsigned RemoveStart = LR->start;
-  unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1;
+  LiveIndex RemoveStart = LR->start;
+  LiveIndex RemoveEnd = li_->getNextSlot(li_->getDefIndex(CopyIdx));
   if (LR->end > RemoveEnd)
     // More uses past this copy? Nothing to do.
     return false;
@@ -911,22 +965,25 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
   if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
     return false;
 
+  // There are other kills of the val#. Nothing to do.
+  if (!li.isOnlyLROfValNo(LR))
+    return false;
+
   MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
   if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
     // If the live range starts in another mbb and the copy mbb is not a fall
     // through mbb, then we can only cut the range from the beginning of the
     // copy mbb.
-    RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1;
+    RemoveStart = li_->getNextSlot(li_->getMBBStartIdx(CopyMBB));
 
   if (LR->valno->def == RemoveStart) {
     // If the def MI defines the val# and this copy is the only kill of the
     // val#, then propagate the dead marker.
-    if (li.isOnlyLROfValNo(LR)) {
-      PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
-      ++numDeadValNo;
-    }
-    if (li.isKill(LR->valno, RemoveEnd))
-      li.removeKill(LR->valno, RemoveEnd);
+    PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
+    ++numDeadValNo;
+
+    if (LR->valno->isKill(RemoveEnd))
+      LR->valno->removeKill(RemoveEnd);
   }
 
   removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
@@ -940,97 +997,19 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
                                                      LiveInterval &ImpLi) const{
   if (!CopyMI->killsRegister(ImpLi.reg))
     return false;
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
-  if (LR == li.end())
-    return false;
-  if (LR->valno->hasPHIKill())
-    return false;
-  if (LR->valno->def != CopyIdx)
-    return false;
-  // Make sure all of val# uses are copies.
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg),
+  // Make sure this is the only use.
+  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg),
          UE = mri_->use_end(); UI != UE;) {
     MachineInstr *UseMI = &*UI;
     ++UI;
-    if (JoinedCopies.count(UseMI))
-      continue;
-    unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
-    LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
-    if (ULR == li.end() || ULR->valno != LR->valno)
+    if (CopyMI == UseMI || JoinedCopies.count(UseMI))
       continue;
-    // If the use is not a use, then it's not safe to coalesce the move.
-    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-    if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
-      if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG &&
-          UseMI->getOperand(1).getReg() == li.reg)
-        continue;
-      return false;
-    }
+    return false;
   }
   return true;
 }
 
 
-/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
-/// implicit_def and it is being removed. Turn all copies from this value#
-/// into implicit_defs.
-void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li,
-                                                            VNInfo *VNI) {
-  SmallVector<MachineInstr*, 4> ImpDefs;
-  MachineOperand *LastUse = NULL;
-  unsigned LastUseIdx = li_->getUseIndex(VNI->def);
-  for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg),
-         RE = mri_->reg_end(); RI != RE;) {
-    MachineOperand *MO = &RI.getOperand();
-    MachineInstr *MI = &*RI;
-    ++RI;
-    if (MO->isDef()) {
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-        ImpDefs.push_back(MI);
-      continue;
-    }
-    if (JoinedCopies.count(MI))
-      continue;
-    unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI));
-    LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
-    if (ULR == li.end() || ULR->valno != VNI)
-      continue;
-    // If the use is a copy, turn it into an identity copy.
-    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-    if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-        SrcReg == li.reg) {
-      // Change it to an implicit_def.
-      MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-      for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i)
-        MI->RemoveOperand(i);
-      // It's no longer a copy, update the valno it defines.
-      unsigned DefIdx = li_->getDefIndex(UseIdx);
-      LiveInterval &DstInt = li_->getInterval(DstReg);
-      LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx);
-      assert(DLR != DstInt.end() && "Live range not found!");
-      assert(DLR->valno->copy == MI);
-      DLR->valno->copy = NULL;
-      ReMatCopies.insert(MI);
-    } else if (UseIdx > LastUseIdx) {
-      LastUseIdx = UseIdx;
-      LastUse = MO;
-    }
-  }
-  if (LastUse) {
-    LastUse->setIsKill();
-    li.addKill(VNI, LastUseIdx+1);
-  } else {
-    // Remove dead implicit_def's.
-    while (!ImpDefs.empty()) {
-      MachineInstr *ImpDef = ImpDefs.back();
-      ImpDefs.pop_back();
-      li_->RemoveMachineInstrFromMaps(ImpDef);
-      ImpDef->eraseFromParent();
-    }
-  }
-}
-
 /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
 /// a virtual destination register with physical source register.
 bool
@@ -1051,13 +1030,14 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
 
   // If the virtual register live interval extends into a loop, turn down
   // aggressiveness.
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx =
+    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
   const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
   if (!L) {
     // Let's see if the virtual register live interval extends into the loop.
     LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
     assert(DLR != DstInt.end() && "Live range not found!");
-    DLR = DstInt.FindLiveRangeContaining(DLR->end+1);
+    DLR = DstInt.FindLiveRangeContaining(li_->getNextSlot(DLR->end));
     if (DLR != DstInt.end()) {
       CopyMBB = li_->getMBBFromIndex(DLR->start);
       L = loopInfo->getLoopFor(CopyMBB);
@@ -1067,7 +1047,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
   if (!L || Length <= Threshold)
     return true;
 
-  unsigned UseIdx = li_->getUseIndex(CopyIdx);
+  LiveIndex UseIdx = li_->getUseIndex(CopyIdx);
   LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
   MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
   if (loopInfo->getLoopFor(SMBB) != L) {
@@ -1080,7 +1060,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
       if (SuccMBB == CopyMBB)
         continue;
       if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
-                          li_->getMBBEndIdx(SuccMBB)+1))
+                          li_->getNextSlot(li_->getMBBEndIdx(SuccMBB))))
         return false;
     }
   }
@@ -1111,11 +1091,12 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
 
   // If the virtual register live interval is defined or cross a loop, turn
   // down aggressiveness.
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  unsigned UseIdx = li_->getUseIndex(CopyIdx);
+  LiveIndex CopyIdx =
+    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex UseIdx = li_->getUseIndex(CopyIdx);
   LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
   assert(SLR != SrcInt.end() && "Live range not found!");
-  SLR = SrcInt.FindLiveRangeContaining(SLR->start-1);
+  SLR = SrcInt.FindLiveRangeContaining(li_->getPrevSlot(SLR->start));
   if (SLR == SrcInt.end())
     return true;
   MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
@@ -1135,7 +1116,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
       if (PredMBB == SMBB)
         continue;
       if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
-                          li_->getMBBEndIdx(PredMBB)+1))
+                          li_->getNextSlot(li_->getMBBEndIdx(PredMBB))))
         return false;
     }
   }
@@ -1236,14 +1217,18 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   LiveInterval &RHS = li_->getInterval(SrcReg);
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
-    DOUT << "Interfere with register ";
-    DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_));
+    DEBUG({
+        errs() << "Interfere with register ";
+        li_->getInterval(RealDstReg).print(errs(), tri_);
+      });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
-      DOUT << "Interfere with sub-register ";
-      DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+      DEBUG({
+          errs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(errs(), tri_);
+        });
       return false; // Not coalescable
     }
   return true;
@@ -1263,14 +1248,18 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   LiveInterval &RHS = li_->getInterval(DstReg);
   if (li_->hasInterval(RealSrcReg) &&
       RHS.overlaps(li_->getInterval(RealSrcReg))) {
-    DOUT << "Interfere with register ";
-    DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_));
+    DEBUG({
+        errs() << "Interfere with register ";
+        li_->getInterval(RealSrcReg).print(errs(), tri_);
+      });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
-      DOUT << "Interfere with sub-register ";
-      DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+      DEBUG({
+          errs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(errs(), tri_);
+        });
       return false; // Not coalescable
     }
   return true;
@@ -1299,7 +1288,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
     return false; // Already done.
 
-  DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI;
+  DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
 
   unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
   bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
@@ -1312,41 +1301,43 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     SrcReg    = CopyMI->getOperand(1).getReg();
     SrcSubIdx = CopyMI->getOperand(2).getImm();
   } else if (isInsSubReg || isSubRegToReg) {
-    if (CopyMI->getOperand(2).getSubReg()) {
-      DOUT << "\tSource of insert_subreg is already coalesced "
-           << "to another register.\n";
-      return false;  // Not coalescable.
-    }
     DstReg    = CopyMI->getOperand(0).getReg();
     DstSubIdx = CopyMI->getOperand(3).getImm();
     SrcReg    = CopyMI->getOperand(2).getReg();
+    SrcSubIdx = CopyMI->getOperand(2).getSubReg();
+    if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
+      // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
+      // coalesced to a larger register so the subreg indices cancel out.
+      DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already "
+                      "coalesced to another register.\n");
+      return false;  // Not coalescable.
+    }
   } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
-    assert(0 && "Unrecognized copy instruction!");
-    return false;
+    llvm_unreachable("Unrecognized copy instruction!");
   }
 
   // If they are already joined we continue.
   if (SrcReg == DstReg) {
-    DOUT << "\tCopy already coalesced.\n";
+    DEBUG(errs() << "\tCopy already coalesced.\n");
     return false;  // Not coalescable.
   }
-  
+
   bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
   bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
 
   // If they are both physical registers, we cannot join them.
   if (SrcIsPhys && DstIsPhys) {
-    DOUT << "\tCan not coalesce physregs.\n";
+    DEBUG(errs() << "\tCan not coalesce physregs.\n");
     return false;  // Not coalescable.
   }
-  
+
   // We only join virtual registers with allocatable physical registers.
   if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
-    DOUT << "\tSrc reg is unallocatable physreg.\n";
+    DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
   if (DstIsPhys && !allocatableRegs_[DstReg]) {
-    DOUT << "\tDst reg is unallocatable physreg.\n";
+    DEBUG(errs() << "\tDst reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
 
@@ -1360,9 +1351,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
     assert(DstSubRC && "Illegal subregister index");
     if (!DstSubRC->contains(SrcSubReg)) {
-      DOUT << "\tIncompatible destination regclass: "
-           << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName()
-           << ".\n";
+      DEBUG(errs() << "\tIncompatible destination regclass: "
+                   << tri_->getName(SrcSubReg) << " not in "
+                   << DstSubRC->getName() << ".\n");
       return false;             // Not coalescable.
     }
   }
@@ -1377,15 +1368,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
     assert(SrcSubRC && "Illegal subregister index");
     if (!SrcSubRC->contains(DstReg)) {
-      DOUT << "\tIncompatible source regclass: "
-           << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName()
-           << ".\n";
+      DEBUG(errs() << "\tIncompatible source regclass: "
+                   << tri_->getName(DstSubReg) << " not in "
+                   << SrcSubRC->getName() << ".\n");
+      (void)DstSubReg;
       return false;             // Not coalescable.
     }
   }
 
   // Should be non-null only when coalescing to a sub-register class.
   bool CrossRC = false;
+  const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
+  const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
   const TargetRegisterClass *NewRC = NULL;
   MachineBasicBlock *CopyMBB = CopyMI->getParent();
   unsigned RealDstReg = 0;
@@ -1400,7 +1394,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (DstSubIdx != SubIdx) {
-          DOUT << "\t Sub-register indices mismatch.\n";
+          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1413,7 +1407,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (SrcSubIdx != SubIdx) {
-          DOUT << "\t Sub-register indices mismatch.\n";
+          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1422,8 +1416,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     } else if ((DstIsPhys && isExtSubReg) ||
                (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
       if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
-        DOUT << "\tSrc of extract_subreg already coalesced with reg"
-             << " of a super-class.\n";
+        DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg"
+                     << " of a super-class.\n");
         return false; // Not coalescable.
       }
 
@@ -1446,11 +1440,22 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           // class as the would be resulting register.
           SubIdx = 0;
         else {
-          DOUT << "\t Sub-register indices mismatch.\n";
+          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       }
       if (SubIdx) {
+        if (!DstIsPhys && !SrcIsPhys) {
+          if (isInsSubReg || isSubRegToReg) {
+            NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
+          } else // extract_subreg {
+            NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
+          }
+        if (!NewRC) {
+          DEBUG(errs() << "\t Conflicting sub-register indices.\n");
+          return false;  // Not coalescable
+        }
+
         unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
         unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
         unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
@@ -1461,7 +1466,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       }
     }
   } else if (differingRegisterClasses(SrcReg, DstReg)) {
-    if (!CrossClassJoin)
+    if (DisableCrossClassJoin)
       return false;
     CrossRC = true;
 
@@ -1502,11 +1507,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       }
     }
 
-    const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
-    const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
     unsigned LargeReg = SrcReg;
     unsigned SmallReg = DstReg;
-    unsigned Limit = 0;
 
     // Now determine the register class of the joined register.
     if (isExtSubReg) {
@@ -1517,13 +1519,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         Again = true;
         return false;
       }
-      Limit = allocatableRCRegs_[DstRC].count();
+      if (!DstIsPhys && !SrcIsPhys)
+        NewRC = SrcRC;
     } else if (!SrcIsPhys && !DstIsPhys) {
       NewRC = getCommonSubClass(SrcRC, DstRC);
       if (!NewRC) {
-        DOUT << "\tDisjoint regclasses: "
-             << SrcRC->getName() << ", "
-             << DstRC->getName() << ".\n";
+        DEBUG(errs() << "\tDisjoint regclasses: "
+                     << SrcRC->getName() << ", "
+                     << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
       }
       if (DstRC->getSize() > SrcRC->getSize())
@@ -1537,7 +1540,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (isExtSubReg || DstRC->isASubClass()) &&
         !isWinToJoinCrossClass(LargeReg, SmallReg,
                                allocatableRCRegs_[NewRC].count())) {
-      DOUT << "\tSrc/Dest are different register classes.\n";
+      DEBUG(errs() << "\tSrc/Dest are different register classes.\n");
       // Allow the coalescer to try again in case either side gets coalesced to
       // a physical register that's compatible with the other side. e.g.
       // r1024 = MOV32to32_ r1025
@@ -1552,15 +1555,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;
   if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
     return false;
-  
+
   LiveInterval &SrcInt = li_->getInterval(SrcReg);
   LiveInterval &DstInt = li_->getInterval(DstReg);
   assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
          "Register mapping is horribly broken!");
 
-  DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_);
-  DOUT << " and "; DstInt.print(DOUT, tri_);
-  DOUT << ": ";
+  DEBUG({
+      errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_);
+      errs() << " and "; DstInt.print(errs(), tri_);
+      errs() << ": ";
+    });
 
   // Save a copy of the virtual register live interval. We'll manually
   // merge this into the "real" physical register live interval this is
@@ -1590,7 +1595,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
             ++numAborts;
-            DOUT << "\tMay tie down a physical register, abort!\n";
+            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1598,7 +1603,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
             ++numAborts;
-            DOUT << "\tMay tie down a physical register, abort!\n";
+            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1612,9 +1617,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
         const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
         unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
-        if (TheCopy.isBackEdge)
-          Threshold *= 2; // Favors back edge copies.
-
         unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
         float Ratio = 1.0 / Threshold;
         if (Length > Threshold &&
@@ -1622,7 +1624,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
                                    mri_->use_end()) / Length) < Ratio)) {
           mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
-          DOUT << "\tMay tie down a physical register, abort!\n";
+          DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1641,7 +1643,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // Only coalesce an empty interval (defined by implicit_def) with
     // another interval which has a valno defined by the CopyMI and the CopyMI
     // is a kill of the implicit def.
-    DOUT << "Not profitable!\n";
+    DEBUG(errs() << "Not profitable!\n");
     return false;
   }
 
@@ -1651,9 +1653,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // If definition of source is defined by trivial computation, try
     // rematerializing it.
     if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
-        ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI))
+        ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
       return true;
-    
+
     // If we can eliminate the copy without merging the live ranges, do so now.
     if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
         (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
@@ -1661,9 +1663,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       JoinedCopies.insert(CopyMI);
       return true;
     }
-    
+
     // Otherwise, we are unable to join the intervals.
-    DOUT << "Interference!\n";
+    DEBUG(errs() << "Interference!\n");
     Again = true;  // May be possible to coalesce later.
     return false;
   }
@@ -1676,7 +1678,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
   assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
          "LiveInterval::join didn't work right!");
-                               
+
   // If we're about to merge live ranges into a physical register live interval,
   // we have to update any aliased register's live ranges to indicate that they
   // have clobbered values for this range.
@@ -1690,14 +1692,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
              E = SavedLI->vni_end(); I != E; ++I) {
         const VNInfo *ValNo = *I;
-        VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+        VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(),
                                                 false, // updated at *
                                                 li_->getVNInfoAllocator());
         NewValNo->setFlags(ValNo->getFlags()); // * updated here.
         RealInt.addKills(NewValNo, ValNo->kills);
         RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
       }
-      RealInt.weight += SavedLI->weight;      
+      RealInt.weight += SavedLI->weight;
       DstReg = RealDstReg ? RealDstReg : RealSrcReg;
     }
 
@@ -1721,32 +1723,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // Coalescing to a virtual register that is of a sub-register class of the
   // other. Make sure the resulting register is set to the right register class.
-  if (CrossRC) {
-      ++numCrossRCs;
-    if (NewRC)
-      mri_->setRegClass(DstReg, NewRC);
-  }
-
-  if (NewHeuristic) {
-    // Add all copies that define val# in the source interval into the queue.
-    for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
-           e = ResSrcInt->vni_end(); i != e; ++i) {
-      const VNInfo *vni = *i;
-      // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
-      if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate())
-        continue;
-      MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
-      unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
-      if (CopyMI &&
-          JoinedCopies.count(CopyMI) == 0 &&
-          tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg,
-                            NewSrcSubIdx, NewDstSubIdx)) {
-        unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB);
-        JoinQueue->push(CopyRec(CopyMI, LoopDepth,
-                                isBackEdgeCopy(CopyMI, DstReg)));
-      }
-    }
-  }
+  if (CrossRC)
+    ++numCrossRCs;
+
+  // This may happen even if it's cross-rc coalescing. e.g.
+  // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4
+  // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
+  // be allocate a register from GR64_ABCD.
+  if (NewRC)
+    mri_->setRegClass(DstReg, NewRC);
 
   // Remember to delete the copy instruction.
   JoinedCopies.insert(CopyMI);
@@ -1757,13 +1742,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   if (TargetRegisterInfo::isVirtualRegister(DstReg))
     RemoveUnnecessaryKills(DstReg, *ResDstInt);
 
-  if (isInsSubReg)
-    // Avoid:
-    // r1024 = op
-    // r1024 = implicit_def
-    // ...
-    //       = r1024
-    RemoveDeadImpDef(DstReg, *ResDstInt);
   UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
 
   // SrcReg is guarateed to be the register whose live interval that is
@@ -1779,29 +1757,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     delete SavedLI;
   }
 
-  if (isEmpty) {
-    // Now the copy is being coalesced away, the val# previously defined
-    // by the copy is being defined by an IMPLICIT_DEF which defines a zero
-    // length interval. Remove the val#.
-    unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-    const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx);
-    VNInfo *ImpVal = LR->valno;
-    assert(ImpVal->def == CopyIdx);
-    unsigned NextDef = LR->end;
-    TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal);
-    ResDstInt->removeValNo(ImpVal);
-    LR = ResDstInt->FindLiveRangeContaining(NextDef);
-    if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
-      // Special case: vr1024 = implicit_def
-      //               vr1024 = insert_subreg vr1024, vr1025, c
-      // The insert_subreg becomes a "copy" that defines a val# which can itself
-      // be coalesced away.
-      MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef);
-      if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG)
-        LR->valno->copy = DefMI;
-    }
-  }
-
   // If resulting interval has a preference that no longer fits because of subreg
   // coalescing, just clear the preference.
   unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
@@ -1812,8 +1767,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
   }
 
-  DOUT << "\n\t\tJoined.  Result = "; ResDstInt->print(DOUT, tri_);
-  DOUT << "\n";
+  DEBUG({
+      errs() << "\n\t\tJoined.  Result = ";
+      ResDstInt->print(errs(), tri_);
+      errs() << "\n";
+    });
 
   ++numJoins;
   return true;
@@ -1860,7 +1818,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
   // been computed, return it.
   if (OtherValNoAssignments[OtherValNo->id] >= 0)
     return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-  
+
   // Mark this value number as currently being computed, then ask what the
   // ultimate value # of the other value is.
   ThisValNoAssignments[VN] = -2;
@@ -1896,7 +1854,7 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
         DstReg == li.reg && SrcReg == Reg) {
       // Cache computed info.
       LR->valno->def  = LR->start;
-      LR->valno->copy = DefMI;
+      LR->valno->setCopy(DefMI);
       return true;
     }
   }
@@ -1910,16 +1868,16 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
 /// joins them and returns true.
 bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
   assert(RHS.containsOneValue());
-  
+
   // Some number (potentially more than one) value numbers in the current
   // interval may be defined as copies from the RHS.  Scan the overlapping
   // portions of the LHS and RHS, keeping track of this and looking for
   // overlapping live ranges that are NOT defined as copies.  If these exist, we
   // cannot coalesce.
-  
+
   LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
   LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
-  
+
   if (LHSIt->start < RHSIt->start) {
     LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
     if (LHSIt != LHS.begin()) --LHSIt;
@@ -1927,9 +1885,9 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
     RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
     if (RHSIt != RHS.begin()) --RHSIt;
   }
-  
+
   SmallVector<VNInfo*, 8> EliminatedLHSVals;
-  
+
   while (1) {
     // Determine if these live intervals overlap.
     bool Overlaps = false;
@@ -1937,7 +1895,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
       Overlaps = LHSIt->end > RHSIt->start;
     else
       Overlaps = RHSIt->end > LHSIt->start;
-    
+
     // If the live intervals overlap, there are two interesting cases: if the
     // LHS interval is defined by a copy from the RHS, it's ok and we record
     // that the LHS value # is the same as the RHS.  If it's not, then we cannot
@@ -1955,7 +1913,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           //   vr1025 = copy vr1024
           //   ..
           // BB2:
-          //   vr1024 = op 
+          //   vr1024 = op
           //          = vr1025
           // Even though vr1025 is copied from vr1024, it's not safe to
           // coalesce them since the live range of vr1025 intersects the
@@ -1964,12 +1922,12 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           return false;
         EliminatedLHSVals.push_back(LHSIt->valno);
       }
-      
+
       // We know this entire LHS live range is okay, so skip it now.
       if (++LHSIt == LHSEnd) break;
       continue;
     }
-    
+
     if (LHSIt->end < RHSIt->end) {
       if (++LHSIt == LHSEnd) break;
     } else {
@@ -1993,7 +1951,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
               //   vr1025 = copy vr1024
               //   ..
               // BB2:
-              //   vr1024 = op 
+              //   vr1024 = op
               //          = vr1025
               // Even though vr1025 is copied from vr1024, it's not safe to
               // coalesced them since live range of vr1025 intersects the
@@ -2007,11 +1965,11 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           }
         }
       }
-      
+
       if (++RHSIt == RHSEnd) break;
     }
   }
-  
+
   // If we got here, we know that the coalescing will be successful and that
   // the value numbers in EliminatedLHSVals will all be merged together.  Since
   // the most common case is that EliminatedLHSVals has a single number, we
@@ -2039,28 +1997,29 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
         *tri_->getSuperRegisters(LHS.reg))
       // Imprecise sub-register information. Can't handle it.
       return false;
-    assert(0 && "No copies from the RHS?");
+    llvm_unreachable("No copies from the RHS?");
   } else {
     LHSValNo = EliminatedLHSVals[0];
   }
-  
+
   // Okay, now that there is a single LHS value number that we're merging the
   // RHS into, update the value number info for the LHS to indicate that the
   // value number is defined where the RHS value number was.
   const VNInfo *VNI = RHS.getValNumInfo(0);
   LHSValNo->def  = VNI->def;
-  LHSValNo->copy = VNI->copy;
-  
+  LHSValNo->setCopy(VNI->getCopy());
+
   // Okay, the final step is to loop over the RHS live intervals, adding them to
   // the LHS.
   if (VNI->hasPHIKill())
     LHSValNo->setHasPHIKill(true);
   LHS.addKills(LHSValNo, VNI->kills);
   LHS.MergeRangesInAsValue(RHS, LHSValNo);
-  LHS.weight += RHS.weight;
+
+  LHS.ComputeJoinedWeight(RHS);
 
   // Update regalloc hint if both are virtual registers.
-  if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && 
+  if (TargetRegisterInfo::isVirtualRegister(LHS.reg) &&
       TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
     std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
     std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
@@ -2122,8 +2081,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
-          DOUT << "Interfere with sub-register ";
-          DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+          DEBUG({
+              errs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(errs(), tri_);
+            });
           return false;
         }
     }
@@ -2137,19 +2098,21 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
-          DOUT << "Interfere with sub-register ";
-          DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+          DEBUG({
+              errs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(errs(), tri_);
+            });
           return false;
         }
     }
   }
-                          
+
   // Compute ultimate value numbers for the LHS and RHS values.
   if (RHS.containsOneValue()) {
     // Copies from a liveinterval with a single value are simple to handle and
     // very common, handle the special case here.  This is important, because
     // often RHS is small and LHS is large (e.g. a physreg).
-    
+
     // Find out if the RHS is defined as a copy from some value in the LHS.
     int RHSVal0DefinedFromLHS = -1;
     int RHSValID = -1;
@@ -2167,15 +2130,16 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       }
     } else {
       // It was defined as a copy from the LHS, find out what value # it is.
-      RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno;
+      RHSValNoInfo =
+        LHS.getLiveRangeContaining(li_->getPrevSlot(RHSValNoInfo0->def))->valno;
       RHSValID = RHSValNoInfo->id;
       RHSVal0DefinedFromLHS = RHSValID;
     }
-    
+
     LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
     RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
     NewVNInfo.resize(LHS.getNumValNums(), NULL);
-    
+
     // Okay, *all* of the values in LHS that are defined as a copy from RHS
     // should now get updated.
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
@@ -2207,7 +2171,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
         LHSValNoAssignments[VN] = VN;
       }
     }
-    
+
     assert(RHSValID != -1 && "Didn't find value #?");
     RHSValNoAssignments[0] = RHSValID;
     if (RHSVal0DefinedFromLHS != -1) {
@@ -2221,44 +2185,46 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
         continue;
-      
+
       // DstReg is known to be a register in the LHS interval.  If the src is
       // from the RHS interval, we can use its value #.
       if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
         continue;
-      
+
       // Figure out the value # from the RHS.
-      LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno;
+      LHSValsDefinedFromRHS[VNI]=
+        RHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno;
     }
-    
+
     // Loop over the value numbers of the RHS, seeing if any are defined from
     // the LHS.
     for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
         continue;
-      
+
       // DstReg is known to be a register in the RHS interval.  If the src is
       // from the LHS interval, we can use its value #.
       if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
         continue;
-      
+
       // Figure out the value # from the LHS.
-      RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno;
+      RHSValsDefinedFromLHS[VNI]=
+        LHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno;
     }
-    
+
     LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
     RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
     NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
-    
+
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
       unsigned VN = VNI->id;
-      if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 
+      if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
         continue;
       ComputeUltimateVN(VNI, NewVNInfo,
                         LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
@@ -2276,20 +2242,20 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
         RHSValNoAssignments[VN] = NewVNInfo.size()-1;
         continue;
       }
-      
+
       ComputeUltimateVN(VNI, NewVNInfo,
                         RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
                         RHSValNoAssignments, LHSValNoAssignments);
     }
   }
-  
+
   // Armed with the mappings of LHS/RHS values to ultimate values, walk the
   // interval lists to see if these intervals are coalescable.
   LiveInterval::const_iterator I = LHS.begin();
   LiveInterval::const_iterator IE = LHS.end();
   LiveInterval::const_iterator J = RHS.begin();
   LiveInterval::const_iterator JE = RHS.end();
-  
+
   // Skip ahead until the first place of potential sharing.
   if (I->start < J->start) {
     I = std::upper_bound(I, IE, J->start);
@@ -2298,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     J = std::upper_bound(J, JE, I->start);
     if (J != RHS.begin()) --J;
   }
-  
+
   while (1) {
     // Determine if these two live ranges overlap.
     bool Overlaps;
@@ -2316,7 +2282,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
           RHSValNoAssignments[J->valno->id])
         return false;
     }
-    
+
     if (I->end < J->end) {
       ++I;
       if (I == IE) break;
@@ -2331,7 +2297,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
     VNInfo *VNI = I->first;
     unsigned LHSValID = LHSValNoAssignments[VNI->id];
-    LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
+    NewVNInfo[LHSValID]->removeKill(VNI->def);
     if (VNI->hasPHIKill())
       NewVNInfo[LHSValID]->setHasPHIKill(true);
     RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
@@ -2342,7 +2308,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
     VNInfo *VNI = I->first;
     unsigned RHSValID = RHSValNoAssignments[VNI->id];
-    LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
+    NewVNInfo[RHSValID]->removeKill(VNI->def);
     if (VNI->hasPHIKill())
       NewVNInfo[RHSValID]->setHasPHIKill(true);
     LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
@@ -2377,37 +2343,17 @@ namespace {
   };
 }
 
-/// getRepIntervalSize - Returns the size of the interval that represents the
-/// specified register.
-template<class SF>
-unsigned JoinPriorityQueue<SF>::getRepIntervalSize(unsigned Reg) {
-  return Rc->getRepIntervalSize(Reg);
-}
-
-/// CopyRecSort::operator - Join priority queue sorting function.
-///
-bool CopyRecSort::operator()(CopyRec left, CopyRec right) const {
-  // Inner loops first.
-  if (left.LoopDepth > right.LoopDepth)
-    return false;
-  else if (left.LoopDepth == right.LoopDepth)
-    if (left.isBackEdge && !right.isBackEdge)
-      return false;
-  return true;
-}
-
 void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
-  DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+  DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
 
   std::vector<CopyRec> VirtCopies;
   std::vector<CopyRec> PhysCopies;
   std::vector<CopyRec> ImpDefCopies;
-  unsigned LoopDepth = loopInfo->getLoopDepth(MBB);
   for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
        MII != E;) {
     MachineInstr *Inst = MII++;
-    
+
     // If this isn't a copy nor a extract_subreg, we can't join intervals.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
@@ -2422,21 +2368,14 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 
     bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
     bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-    if (NewHeuristic) {
-      JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg)));
-    } else {
-      if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
-        ImpDefCopies.push_back(CopyRec(Inst, 0, false));
-      else if (SrcIsPhys || DstIsPhys)
-        PhysCopies.push_back(CopyRec(Inst, 0, false));
-      else
-        VirtCopies.push_back(CopyRec(Inst, 0, false));
-    }
+    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+      ImpDefCopies.push_back(CopyRec(Inst, 0));
+    else if (SrcIsPhys || DstIsPhys)
+      PhysCopies.push_back(CopyRec(Inst, 0));
+    else
+      VirtCopies.push_back(CopyRec(Inst, 0));
   }
 
-  if (NewHeuristic)
-    return;
-
   // Try coalescing implicit copies first, followed by copies to / from
   // physical registers, then finally copies from virtual registers to
   // virtual registers.
@@ -2464,10 +2403,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 }
 
 void SimpleRegisterCoalescing::joinIntervals() {
-  DOUT << "********** JOINING INTERVALS ***********\n";
-
-  if (NewHeuristic)
-    JoinQueue = new JoinPriorityQueue<CopyRecSort>(this);
+  DEBUG(errs() << "********** JOINING INTERVALS ***********\n");
 
   std::vector<CopyRec> TryAgainList;
   if (loopInfo->empty()) {
@@ -2495,52 +2431,26 @@ void SimpleRegisterCoalescing::joinIntervals() {
     for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
       CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
   }
-  
+
   // Joining intervals can allow other intervals to be joined.  Iteratively join
   // until we make no progress.
-  if (NewHeuristic) {
-    SmallVector<CopyRec, 16> TryAgain;
-    bool ProgressMade = true;
-    while (ProgressMade) {
-      ProgressMade = false;
-      while (!JoinQueue->empty()) {
-        CopyRec R = JoinQueue->pop();
-        bool Again = false;
-        bool Success = JoinCopy(R, Again);
-        if (Success)
-          ProgressMade = true;
-        else if (Again)
-          TryAgain.push_back(R);
-      }
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
 
-      if (ProgressMade) {
-        while (!TryAgain.empty()) {
-          JoinQueue->push(TryAgain.back());
-          TryAgain.pop_back();
-        }
-      }
-    }
-  } else {
-    bool ProgressMade = true;
-    while (ProgressMade) {
-      ProgressMade = false;
-
-      for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
-        CopyRec &TheCopy = TryAgainList[i];
-        if (TheCopy.MI) {
-          bool Again = false;
-          bool Success = JoinCopy(TheCopy, Again);
-          if (Success || !Again) {
-            TheCopy.MI = 0;   // Mark this one as done.
-            ProgressMade = true;
-          }
-        }
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      CopyRec &TheCopy = TryAgainList[i];
+      if (!TheCopy.MI)
+        continue;
+
+      bool Again = false;
+      bool Success = JoinCopy(TheCopy, Again);
+      if (Success || !Again) {
+        TheCopy.MI = 0;   // Mark this one as done.
+        ProgressMade = true;
       }
     }
   }
-
-  if (NewHeuristic)
-    delete JoinQueue;  
 }
 
 /// Return true if the two specified registers belong to different register
@@ -2567,9 +2477,11 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
 /// lastRegisterUse - Returns the last use of the specific register between
 /// cycles Start and End or NULL if there are no uses.
 MachineOperand *
-SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
-                                          unsigned Reg, unsigned &UseIdx) const{
-  UseIdx = 0;
+SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start,
+                                          LiveIndex End,
+                                          unsigned Reg,
+                                          LiveIndex &UseIdx) const{
+  UseIdx = LiveIndex();
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     MachineOperand *LastUse = NULL;
     for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg),
@@ -2581,7 +2493,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
           SrcReg == DstReg)
         // Ignore identity copies.
         continue;
-      unsigned Idx = li_->getInstructionIndex(UseMI);
+      LiveIndex Idx = li_->getInstructionIndex(UseMI);
       if (Idx >= Start && Idx < End && Idx >= UseIdx) {
         LastUse = &Use;
         UseIdx = li_->getUseIndex(Idx);
@@ -2590,13 +2502,13 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
     return LastUse;
   }
 
-  int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
-  int s = Start;
+  LiveIndex s = Start;
+  LiveIndex e = li_->getBaseIndex(li_->getPrevSlot(End));
   while (e >= s) {
     // Skip deleted instructions
     MachineInstr *MI = li_->getInstructionFromIndex(e);
-    while ((e - InstrSlots::NUM) >= s && !MI) {
-      e -= InstrSlots::NUM;
+    while (e != LiveIndex() && li_->getPrevIndex(e) >= s && !MI) {
+      e = li_->getPrevIndex(e);
       MI = li_->getInstructionFromIndex(e);
     }
     if (e < s || MI == NULL)
@@ -2615,7 +2527,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
         }
       }
 
-    e -= InstrSlots::NUM;
+    e = li_->getPrevIndex(e);
   }
 
   return NULL;
@@ -2624,9 +2536,9 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
 
 void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    cerr << tri_->getName(reg);
+    errs() << tri_->getName(reg);
   else
-    cerr << "%reg" << reg;
+    errs() << "%reg" << reg;
 }
 
 void SimpleRegisterCoalescing::releaseMemory() {
@@ -2635,64 +2547,106 @@ void SimpleRegisterCoalescing::releaseMemory() {
   ReMatDefs.clear();
 }
 
-static bool isZeroLengthInterval(LiveInterval *li) {
+/// Returns true if the given live interval is zero length.
+static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
   for (LiveInterval::Ranges::const_iterator
          i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
-    if (i->end - i->start > LiveInterval::InstrSlots::NUM)
+    if (li_->getPrevIndex(i->end) > i->start)
       return false;
   return true;
 }
 
-/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
-/// turn the copy into an implicit def.
-bool
-SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
-                                             MachineBasicBlock *MBB,
-                                             unsigned DstReg, unsigned SrcReg) {
-  MachineInstr *CopyMI = &*I;
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  if (!li_->hasInterval(SrcReg))
-    return false;
-  LiveInterval &SrcInt = li_->getInterval(SrcReg);
-  if (!SrcInt.empty())
-    return false;
-  if (!li_->hasInterval(DstReg))
-    return false;
-  LiveInterval &DstInt = li_->getInterval(DstReg);
-  const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
-  // If the valno extends beyond this basic block, then it's not safe to delete
-  // the val# or else livein information won't be correct.
-  MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end);
-  if (EndMBB != MBB)
-    return false;
-  DstInt.removeValNo(DstLR->valno);
-  CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-  for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
-    CopyMI->RemoveOperand(i);
-  CopyMI->getOperand(0).setIsUndef();
-  bool NoUse = mri_->use_empty(SrcReg);
-  if (NoUse) {
-    for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(SrcReg),
-           RE = mri_->reg_end(); RI != RE; ) {
-      assert(RI.getOperand().isDef());
-      MachineInstr *DefMI = &*RI;
-      ++RI;
-      // The implicit_def source has no other uses, delete it.
-      assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
-      li_->RemoveMachineInstrFromMaps(DefMI);
-      DefMI->eraseFromParent();
+void SimpleRegisterCoalescing::CalculateSpillWeights() {
+  SmallSet<unsigned, 4> Processed;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* MBB = mbbi;
+    LiveIndex MBBEnd = li_->getMBBEndIdx(MBB);
+    MachineLoop* loop = loopInfo->getLoopFor(MBB);
+    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+    bool isExit = loop ? loop->isLoopExit(MBB) : false;
+
+    for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
+         mii != mie; ++mii) {
+      MachineInstr *MI = mii;
+
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &mopi = MI->getOperand(i);
+        if (!mopi.isReg() || mopi.getReg() == 0)
+          continue;
+        unsigned Reg = mopi.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+          continue;
+        // Multiple uses of reg by the same instruction. It should not
+        // contribute to spill weight again.
+        if (!Processed.insert(Reg))
+          continue;
+
+        bool HasDef = mopi.isDef();
+        bool HasUse = !HasDef;
+        for (unsigned j = i+1; j != e; ++j) {
+          const MachineOperand &mopj = MI->getOperand(j);
+          if (!mopj.isReg() || mopj.getReg() != Reg)
+            continue;
+          HasDef |= mopj.isDef();
+          HasUse |= mopj.isUse();
+          if (HasDef && HasUse)
+            break;
+        }
+
+        LiveInterval &RegInt = li_->getInterval(Reg);
+        float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth);
+        if (HasDef && isExit) {
+          // Looks like this is a loop count variable update.
+          LiveIndex DefIdx =
+            li_->getDefIndex(li_->getInstructionIndex(MI));
+          const LiveRange *DLR =
+            li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+          if (DLR->end > MBBEnd)
+            Weight *= 3.0F;
+        }
+        RegInt.weight += Weight;
+      }
+      Processed.clear();
     }
   }
 
-  // Mark uses of implicit_def isUndef.
-  for (MachineRegisterInfo::use_iterator RI = mri_->use_begin(DstReg),
-         RE = mri_->use_end(); RI != RE; ++RI) {
-    assert((*RI).getParent() == MBB);
-    RI.getOperand().setIsUndef();
-  }
+  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+    LiveInterval &LI = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      // If the live interval length is essentially zero, i.e. in every live
+      // range the use follows def immediately, it doesn't make sense to spill
+      // it and hope it will be easier to allocate for this li.
+      if (isZeroLengthInterval(&LI, li_)) {
+        LI.weight = HUGE_VALF;
+        continue;
+      }
 
-  ++I;
-  return true;
+      bool isLoad = false;
+      SmallVector<LiveInterval*, 4> SpillIs;
+      if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
+        // If all of the definitions of the interval are re-materializable,
+        // it is a preferred candidate for spilling. If non of the defs are
+        // loads, then it's potentially very cheap to re-materialize.
+        // FIXME: this gets much more complicated once we support non-trivial
+        // re-materialization.
+        if (isLoad)
+          LI.weight *= 0.9F;
+        else
+          LI.weight *= 0.5F;
+      }
+
+      // Slightly prefer live interval that has been assigned a preferred reg.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+      if (Hint.first || Hint.second)
+        LI.weight *= 1.01F;
+
+      // Divide the weight of the interval by its size.  This encourages
+      // spilling of intervals that are large and have few uses, and
+      // discourages spilling of small intervals with many uses.
+      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
+    }
+  }
 }
 
 
@@ -2703,11 +2657,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   tri_ = tm_->getRegisterInfo();
   tii_ = tm_->getInstrInfo();
   li_ = &getAnalysis<LiveIntervals>();
+  AA = &getAnalysis<AliasAnalysis>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
-  DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
-       << "********** Function: "
-       << ((Value*)mf_->getFunction())->getName() << '\n';
+  DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
 
   allocatableRegs_ = tri_->getAllocatableSet(fn);
   for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
@@ -2719,10 +2674,10 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   if (EnableJoining) {
     joinIntervals();
     DEBUG({
-        DOUT << "********** INTERVALS POST JOINING **********\n";
+        errs() << "********** INTERVALS POST JOINING **********\n";
         for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){
-          I->second->print(DOUT, tri_);
-          DOUT << "\n";
+          I->second->print(errs(), tri_);
+          errs() << "\n";
         }
       });
   }
@@ -2733,29 +2688,40 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
     MachineBasicBlock* mbb = mbbi;
-    unsigned loopDepth = loopInfo->getLoopDepth(mbb);
-
     for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
          mii != mie; ) {
       MachineInstr *MI = mii;
       unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
       if (JoinedCopies.count(MI)) {
         // Delete all coalesced copies.
+        bool DoDelete = true;
         if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
           assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
                   MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
                   MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
                  "Unrecognized copy instruction");
           DstReg = MI->getOperand(0).getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+            // Do not delete extract_subreg, insert_subreg of physical
+            // registers unless the definition is dead. e.g.
+            // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+            // or else the scavenger may complain. LowerSubregs will
+            // change this to an IMPLICIT_DEF later.
+            DoDelete = false;
         }
         if (MI->registerDefIsDead(DstReg)) {
           LiveInterval &li = li_->getInterval(DstReg);
           if (!ShortenDeadCopySrcLiveRange(li, MI))
             ShortenDeadCopyLiveRange(li, MI);
+          DoDelete = true;
+        }
+        if (!DoDelete)
+          mii = next(mii);
+        else {
+          li_->RemoveMachineInstrFromMaps(MI);
+          mii = mbbi->erase(mii);
+          ++numPeep;
         }
-        li_->RemoveMachineInstrFromMaps(MI);
-        mii = mbbi->erase(mii);
-        ++numPeep;
         continue;
       }
 
@@ -2807,70 +2773,20 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         li_->RemoveMachineInstrFromMaps(MI);
         mii = mbbi->erase(mii);
         ++numPeep;
-      } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) {
-        SmallSet<unsigned, 4> UniqueUses;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &mop = MI->getOperand(i);
-          if (mop.isReg() && mop.getReg() &&
-              TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
-            unsigned reg = mop.getReg();
-            // Multiple uses of reg by the same instruction. It should not
-            // contribute to spill weight again.
-            if (UniqueUses.count(reg) != 0)
-              continue;
-            LiveInterval &RegInt = li_->getInterval(reg);
-            RegInt.weight +=
-              li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
-            UniqueUses.insert(reg);
-          }
-        }
+      } else {
         ++mii;
       }
     }
   }
 
-  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
-    LiveInterval &LI = *I->second;
-    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-      // If the live interval length is essentially zero, i.e. in every live
-      // range the use follows def immediately, it doesn't make sense to spill
-      // it and hope it will be easier to allocate for this li.
-      if (isZeroLengthInterval(&LI))
-        LI.weight = HUGE_VALF;
-      else {
-        bool isLoad = false;
-        SmallVector<LiveInterval*, 4> SpillIs;
-        if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
-          // If all of the definitions of the interval are re-materializable,
-          // it is a preferred candidate for spilling. If non of the defs are
-          // loads, then it's potentially very cheap to re-materialize.
-          // FIXME: this gets much more complicated once we support non-trivial
-          // re-materialization.
-          if (isLoad)
-            LI.weight *= 0.9F;
-          else
-            LI.weight *= 0.5F;
-        }
-      }
-
-      // Slightly prefer live interval that has been assigned a preferred reg.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
-      if (Hint.first || Hint.second)
-        LI.weight *= 1.01F;
-
-      // Divide the weight of the interval by its size.  This encourages 
-      // spilling of intervals that are large and have few uses, and
-      // discourages spilling of small intervals with many uses.
-      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
-    }
-  }
+  CalculateSpillWeights();
 
   DEBUG(dump());
   return true;
 }
 
 /// print - Implement the dump method.
-void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const {
+void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const {
    li_->print(O, m);
 }
 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index d2c5581..3ebe3a1 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/ADT/BitVector.h"
-#include <queue>
 
 namespace llvm {
   class SimpleRegisterCoalescing;
@@ -33,44 +32,8 @@ namespace llvm {
   struct CopyRec {
     MachineInstr *MI;
     unsigned LoopDepth;
-    bool isBackEdge;
-    CopyRec(MachineInstr *mi, unsigned depth, bool be)
-      : MI(mi), LoopDepth(depth), isBackEdge(be) {};
-  };
-
-  template<class SF> class JoinPriorityQueue;
-
-  /// CopyRecSort - Sorting function for coalescer queue.
-  ///
-  struct CopyRecSort : public std::binary_function<CopyRec,CopyRec,bool> {
-    JoinPriorityQueue<CopyRecSort> *JPQ;
-    explicit CopyRecSort(JoinPriorityQueue<CopyRecSort> *jpq) : JPQ(jpq) {}
-    CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {}
-    bool operator()(CopyRec left, CopyRec right) const;
-  };
-
-  /// JoinQueue - A priority queue of copy instructions the coalescer is
-  /// going to process.
-  template<class SF>
-  class JoinPriorityQueue {
-    SimpleRegisterCoalescing *Rc;
-    std::priority_queue<CopyRec, std::vector<CopyRec>, SF> Queue;
-
-  public:
-    explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc)
-      : Rc(rc), Queue(SF(this)) {}
-
-    bool empty() const { return Queue.empty(); }
-    void push(CopyRec R) { Queue.push(R); }
-    CopyRec pop() {
-      if (empty()) return CopyRec(0, 0, false);
-      CopyRec R = Queue.top();
-      Queue.pop();
-      return R;
-    }
-
-    // Callbacks to SimpleRegisterCoalescing.
-    unsigned getRepIntervalSize(unsigned Reg);
+    CopyRec(MachineInstr *mi, unsigned depth)
+      : MI(mi), LoopDepth(depth) {};
   };
 
   class SimpleRegisterCoalescing : public MachineFunctionPass,
@@ -82,14 +45,11 @@ namespace llvm {
     const TargetInstrInfo* tii_;
     LiveIntervals *li_;
     const MachineLoopInfo* loopInfo;
+    AliasAnalysis *AA;
     
     BitVector allocatableRegs_;
     DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
 
-    /// JoinQueue - A priority queue of copy instructions the coalescer is
-    /// going to process.
-    JoinPriorityQueue<CopyRecSort> *JoinQueue;
-
     /// JoinedCopies - Keep track of copies eliminated due to coalescing.
     ///
     SmallPtrSet<MachineInstr*, 32> JoinedCopies;
@@ -127,20 +87,8 @@ namespace llvm {
       return false;
     };
 
-    /// getRepIntervalSize - Called from join priority queue sorting function.
-    /// It returns the size of the interval that represent the given register.
-    unsigned getRepIntervalSize(unsigned Reg) {
-      if (!li_->hasInterval(Reg))
-        return 0;
-      return li_->getApproximateInstructionCount(li_->getInterval(Reg)) *
-             LiveInterval::InstrSlots::NUM;
-    }
-
     /// print - Implement the dump method.
-    virtual void print(std::ostream &O, const Module* = 0) const;
-    void print(std::ostream *O, const Module* M = 0) const {
-      if (O) print(*O, M);
-    }
+    virtual void print(raw_ostream &O, const Module* = 0) const;
 
   private:
     /// joinIntervals - join compatible live intervals
@@ -176,7 +124,6 @@ namespace llvm {
     /// classes.  The registers may be either phys or virt regs.
     bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
 
-
     /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
     /// the source value number is defined by a copy from the destination reg
     /// see if we can merge these two destination reg valno# into a single
@@ -199,20 +146,14 @@ namespace llvm {
     /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
     /// block as the copy instruction, trim the ive interval to the last use
     /// and return true.
-    bool TrimLiveIntervalToLastUse(unsigned CopyIdx,
+    bool TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
                                    MachineBasicBlock *CopyMBB,
                                    LiveInterval &li, const LiveRange *LR);
 
     /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
     /// computation, replace the copy by rematerialize the definition.
     bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
-                                 MachineInstr *CopyMI);
-
-    /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
-    /// turn the copy into an implicit def.
-    bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
-                            MachineBasicBlock *MBB,
-                            unsigned DstReg, unsigned SrcReg);
+                                 unsigned DstSubIdx, MachineInstr *CopyMI);
 
     /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
     /// from an implicit def to another register can be coalesced away.
@@ -266,10 +207,6 @@ namespace llvm {
     bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
                                      unsigned Reg);
 
-    /// isBackEdgeCopy - Return true if CopyMI is a back edge copy.
-    ///
-    bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const;
-
     /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
     /// update the subregister number if it is not zero. If DstReg is a
     /// physical register and the existing subregister number of the def / use
@@ -277,10 +214,6 @@ namespace llvm {
     /// subregister.
     void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
 
-    /// RemoveDeadImpDef - Remove implicit_def instructions which are
-    /// "re-defining" registers due to insert_subreg coalescing. e.g.
-    void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI);
-
     /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
     /// due to live range lengthening as the result of coalescing.
     void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
@@ -302,8 +235,13 @@ namespace llvm {
 
     /// lastRegisterUse - Returns the last use of the specific register between
     /// cycles Start and End or NULL if there are no uses.
-    MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg,
-                                    unsigned &LastUseIdx) const;
+    MachineOperand *lastRegisterUse(LiveIndex Start,
+                                    LiveIndex End, unsigned Reg,
+                                    LiveIndex &LastUseIdx) const;
+
+    /// CalculateSpillWeights - Compute spill weights for all virtual register
+    /// live intervals.
+    void CalculateSpillWeights();
 
     void printRegName(unsigned reg) const;
   };
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 0000000..e987fa2
--- /dev/null
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,520 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+  class VISIBILITY_HIDDEN SjLjEHPass : public FunctionPass {
+
+    const TargetLowering *TLI;
+
+    const Type *FunctionContextTy;
+    Constant *RegisterFn;
+    Constant *UnregisterFn;
+    Constant *ResumeFn;
+    Constant *BuiltinSetjmpFn;
+    Constant *FrameAddrFn;
+    Constant *LSDAAddrFn;
+    Value *PersonalityFn;
+    Constant *SelectorFn;
+    Constant *ExceptionFn;
+
+    Value *CallSite;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+      : FunctionPass(&ID), TLI(tli) { }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    const char *getPassName() const {
+      return "SJLJ Exception Handling preparation";
+    }
+
+  private:
+    void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+                            Value *CallSite,
+                            SwitchInst *CatchSwitch);
+    void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+    bool insertSjLjEHSupport(Function &F);
+  };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+  return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+  // Build the function context structure.
+  // builtin_setjmp uses a five word jbuf
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
+  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  FunctionContextTy =
+    StructType::get(M.getContext(),
+                    VoidPtrTy,                        // __prev
+                    Int32Ty,                          // call_site
+                    ArrayType::get(Int32Ty, 4),       // __data
+                    VoidPtrTy,                        // __personality
+                    VoidPtrTy,                        // __lsda
+                    ArrayType::get(VoidPtrTy, 5),     // __jbuf
+                    NULL);
+  RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+                                     Type::getVoidTy(M.getContext()),
+                                     PointerType::getUnqual(FunctionContextTy),
+                                     (Type *)0);
+  UnregisterFn =
+    M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+                          Type::getVoidTy(M.getContext()),
+                          PointerType::getUnqual(FunctionContextTy),
+                          (Type *)0);
+  ResumeFn =
+    M.getOrInsertFunction("_Unwind_SjLj_Resume",
+                          Type::getVoidTy(M.getContext()),
+                          VoidPtrTy,
+                          (Type *)0);
+  FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+  BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+  LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+  SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+  ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+  PersonalityFn = 0;
+
+  return true;
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+                                    Value *CallSite,
+                                    SwitchInst *CatchSwitch) {
+  ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
+  // The runtime comes back to the dispatcher with the call_site - 1 in
+  // the context. Odd, but there it is.
+  ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo - 1);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert a store of the invoke num before the invoke and store zero into the
+  // location afterward.
+  new StoreInst(CallSiteNoC, CallSite, true, II);  // volatile
+
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+  // We still want this to look like an invoke so we emit the LSDA properly
+  // FIXME: ??? Or will this cause strangeness with mis-matched IDs like
+  //  when it was in the front end?
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge.  This process also splits all critical edges
+/// coming out of invoke's.
+void SjLjEHPass::
+splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    // This is always a no-op cast because we're casting AI to AI->getType() so
+    // src and destination types are identical. BitCast is the only possibility.
+    CastInst *NC = new BitCastInst(
+      AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+    AI->replaceAllUsesWith(NC);
+    // Normally its is forbidden to replace a CastInst's operand because it
+    // could cause the opcode to reflect an illegal conversion. However, we're
+    // replacing it here with the same value it was constructed with to simply
+    // make NC its user.
+    NC->setOperand(0, AI);
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+
+  // Look through the terminators of the basic blocks to find invokes, returns
+  // and unwinds
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+  // If we don't have any invokes or unwinds, there's nothing to do.
+  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  // Find the eh.selector.*  and eh.exception calls. We'll use the first
+  // eh.selector to determine the right personality function to use. For
+  // SJLJ, we always use the same personality for the whole function,
+  // not on a per-selector basis.
+  // FIXME: That's a bit ugly. Better way?
+  SmallVector<CallInst*,16> EH_Selectors;
+  SmallVector<CallInst*,16> EH_Exceptions;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (CI->getCalledFunction() == SelectorFn) {
+          if (!PersonalityFn) PersonalityFn = CI->getOperand(2);
+          EH_Selectors.push_back(CI);
+        } else if (CI->getCalledFunction() == ExceptionFn) {
+          EH_Exceptions.push_back(CI);
+        }
+      }
+    }
+  }
+  // If we don't have any eh.selector calls, we can't determine the personality
+  // function. Without a personality function, we can't process exceptions.
+  if (!PersonalityFn) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  if (!Invokes.empty()) {
+    // We have invokes, so we need to add register/unregister calls to get
+    // this function onto the global unwind stack.
+    //
+    // First thing we need to do is scan the whole function for values that are
+    // live across unwind edges.  Each value that is live across an unwind edge
+    // we spill into a stack location, guaranteeing that there is nothing live
+    // across the unwind edge.  This process also splits all critical edges
+    // coming out of invoke's.
+    splitLiveRangesLiveAcrossInvokes(Invokes);
+
+    BasicBlock *EntryBB = F.begin();
+    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+    // that needs to be restored on all exits from the function.  This is an
+    // alloca because the value needs to be added to the global context list.
+    unsigned Align = 4; // FIXME: Should be a TLI check?
+    AllocaInst *FunctionContext =
+      new AllocaInst(FunctionContextTy, 0, Align,
+                     "fcn_context", F.begin()->begin());
+
+    Value *Idxs[2];
+    const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+    Value *Zero = ConstantInt::get(Int32Ty, 0);
+    // We need to also keep around a reference to the call_site field
+    Idxs[0] = Zero;
+    Idxs[1] = ConstantInt::get(Int32Ty, 1);
+    CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                         "call_site",
+                                         EntryBB->getTerminator());
+
+    // The exception selector comes back in context->data[1]
+    Idxs[1] = ConstantInt::get(Int32Ty, 2);
+    Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                              "fc_data",
+                                              EntryBB->getTerminator());
+    Idxs[1] = ConstantInt::get(Int32Ty, 1);
+    Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                    "exc_selector_gep",
+                                                    EntryBB->getTerminator());
+    // The exception value comes back in context->data[0]
+    Idxs[1] = Zero;
+    Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                     "exception_gep",
+                                                     EntryBB->getTerminator());
+
+    // The result of the eh.selector call will be replaced with a
+    // a reference to the selector value returned in the function
+    // context. We leave the selector itself so the EH analysis later
+    // can use it.
+    for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+      CallInst *I = EH_Selectors[i];
+      Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+      I->replaceAllUsesWith(SelectorVal);
+    }
+    // eh.exception calls are replaced with references to the proper
+    // location in the context. Unlike eh.selector, the eh.exception
+    // calls are removed entirely.
+    for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+      CallInst *I = EH_Exceptions[i];
+      // Possible for there to be duplicates, so check to make sure
+      // the instruction hasn't already been removed.
+      if (!I->getParent()) continue;
+      Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+      const Type *Ty = Type::getInt8PtrTy(F.getContext());
+      Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+      I->replaceAllUsesWith(Val);
+      I->eraseFromParent();
+    }
+
+
+
+
+    // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+    // branch to a dispatch block for non-zero returns. If we return normally,
+    // we're not handling an exception and just register the function context
+    // and continue.
+
+    // Create the dispatch block.  The dispatch block is basically a big switch
+    // statement that goes to all of the invoke landing pads.
+    BasicBlock *DispatchBlock =
+            BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+    // Insert a load in the Catch block, and a switch on its value.  By default,
+    // we go to a block that just does an unwind (which is the correct action
+    // for a standard call).
+    BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+    Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+                                       DispatchBlock);
+    SwitchInst *DispatchSwitch =
+      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock);
+    // Split the entry block to insert the conditional branch for the setjmp.
+    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                     "eh.sjlj.setjmp.cont");
+
+    // Populate the Function Context
+    //   1. LSDA address
+    //   2. Personality function address
+    //   3. jmpbuf (save FP and call eh.sjlj.setjmp)
+
+    // LSDA address
+    Idxs[0] = Zero;
+    Idxs[1] = ConstantInt::get(Int32Ty, 4);
+    Value *LSDAFieldPtr =
+      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "lsda_gep",
+                                EntryBB->getTerminator());
+    Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+                                   EntryBB->getTerminator());
+    new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+    Idxs[1] = ConstantInt::get(Int32Ty, 3);
+    Value *PersonalityFieldPtr =
+      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "lsda_gep",
+                                EntryBB->getTerminator());
+    new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+                  EntryBB->getTerminator());
+
+    //   Save the frame pointer.
+    Idxs[1] = ConstantInt::get(Int32Ty, 5);
+    Value *FieldPtr
+      = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                  "jbuf_gep",
+                                  EntryBB->getTerminator());
+    Idxs[1] = ConstantInt::get(Int32Ty, 0);
+    Value *ElemPtr =
+      GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+                                EntryBB->getTerminator());
+
+    Value *Val = CallInst::Create(FrameAddrFn,
+                                  ConstantInt::get(Int32Ty, 0),
+                                  "fp",
+                                  EntryBB->getTerminator());
+    new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator());
+    // Call the setjmp instrinsic. It fills in the rest of the jmpbuf
+    Value *SetjmpArg =
+      CastInst::Create(Instruction::BitCast, FieldPtr,
+                       Type::getInt8PtrTy(F.getContext()), "",
+                       EntryBB->getTerminator());
+    Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+                                          "dispatch",
+                                          EntryBB->getTerminator());
+    // check the return value of the setjmp. non-zero goes to dispatcher
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, DispatchVal, Zero,
+                                   "notunwind");
+    // Nuke the uncond branch.
+    EntryBB->getTerminator()->eraseFromParent();
+
+    // Put in a new condbranch in its place.
+    BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+    // Register the function context and make sure it's known to not throw
+    CallInst *Register =
+      CallInst::Create(RegisterFn, FunctionContext, "",
+                       ContBlock->getTerminator());
+    Register->setDoesNotThrow();
+
+    // At this point, we are all set up, update the invoke instructions
+    // to mark their call_site values, and fill in the dispatch switch
+    // accordingly.
+    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+      markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+    // The front end has likely added calls to _Unwind_Resume. We need
+    // to find those calls and mark the call_site as -1 immediately prior.
+    // resume is a noreturn function, so any block that has a call to it
+    // should end in an 'unreachable' instruction with the call immediately
+    // prior. That's how we'll search.
+    // ??? There's got to be a better way. this is fugly.
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) {
+        BasicBlock::iterator I = BB->getTerminator();
+        // Check the previous instruction and see if it's a resume call
+        if (I == BB->begin()) continue;
+        if (CallInst *CI = dyn_cast<CallInst>(--I)) {
+          if (CI->getCalledFunction() == ResumeFn) {
+            Value *NegativeOne = Constant::getAllOnesValue(Int32Ty);
+            new StoreInst(NegativeOne, CallSite, true, I);  // volatile
+          }
+        }
+      }
+
+    // Replace all unwinds with a branch to the unwind handler.
+    // ??? Should this ever happen with sjlj exceptions?
+    for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+      BranchInst::Create(UnwindBlock, Unwinds[i]);
+      Unwinds[i]->eraseFromParent();
+    }
+
+    // Finally, for any returns from this function, if this function contains an
+    // invoke, add a call to unregister the function context.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+      CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+  }
+
+  return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+  bool Res = insertSjLjEHSupport(F);
+  return Res;
+}
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 405cd80..0277d64 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -13,12 +13,13 @@
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -50,13 +51,13 @@ protected:
 
   /// Ensures there is space before the given machine instruction, returns the
   /// instruction's new number.
-  unsigned makeSpaceBefore(MachineInstr *mi) {
+  LiveIndex makeSpaceBefore(MachineInstr *mi) {
     if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
       lis->scaleNumbering(2);
       ls->scaleNumbering(2);
     }
 
-    unsigned miIdx = lis->getInstructionIndex(mi);
+    LiveIndex miIdx = lis->getInstructionIndex(mi);
 
     assert(lis->hasGapBeforeInstr(miIdx));
     
@@ -65,13 +66,13 @@ protected:
 
   /// Ensure there is space after the given machine instruction, returns the
   /// instruction's new number.
-  unsigned makeSpaceAfter(MachineInstr *mi) {
+  LiveIndex makeSpaceAfter(MachineInstr *mi) {
     if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
       lis->scaleNumbering(2);
       ls->scaleNumbering(2);
     }
 
-    unsigned miIdx = lis->getInstructionIndex(mi);
+    LiveIndex miIdx = lis->getInstructionIndex(mi);
 
     assert(lis->hasGapAfterInstr(miIdx));
 
@@ -82,19 +83,19 @@ protected:
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertStoreAfter(MachineInstr *mi, unsigned ss,
-                          unsigned vreg,
-                          const TargetRegisterClass *trc) {
+  LiveIndex insertStoreAfter(MachineInstr *mi, unsigned ss,
+                                     unsigned vreg,
+                                     const TargetRegisterClass *trc) {
 
     MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
-    unsigned miIdx = makeSpaceAfter(mi);
+    LiveIndex miIdx = makeSpaceAfter(mi);
 
     tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(next(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+    LiveIndex storeInstIdx = lis->getNextIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -107,15 +108,15 @@ protected:
   /// Insert a store of the given vreg to the given stack slot immediately
   /// before the given instructnion. Returns the base index of the inserted
   /// Instruction.
-  unsigned insertStoreBefore(MachineInstr *mi, unsigned ss,
-                            unsigned vreg,
-                            const TargetRegisterClass *trc) {
-    unsigned miIdx = makeSpaceBefore(mi);
+  LiveIndex insertStoreBefore(MachineInstr *mi, unsigned ss,
+                                      unsigned vreg,
+                                      const TargetRegisterClass *trc) {
+    LiveIndex miIdx = makeSpaceBefore(mi);
   
     tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(prior(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    unsigned storeInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+    LiveIndex storeInstIdx = lis->getPrevIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -130,14 +131,15 @@ protected:
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
 
-    unsigned storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
-    unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
-             end = lis->getUseIndex(storeInstIdx);
+    LiveIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
+    LiveIndex start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+                      end = lis->getUseIndex(storeInstIdx);
 
     VNInfo *vni =
       li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
-    vni->kills.push_back(storeInstIdx);
-    DOUT << "    Inserting store range: [" << start << ", " << end << ")\n";
+    vni->addKill(storeInstIdx);
+    DEBUG(errs() << "    Inserting store range: [" << start
+                 << ", " << end << ")\n");
     LiveRange lr(start, end, vni);
       
     li->addRange(lr);
@@ -147,18 +149,18 @@ protected:
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsibel for adding/removing an appropriate
   /// range vreg's LiveInterval.
-  unsigned insertLoadAfter(MachineInstr *mi, unsigned ss,
-                          unsigned vreg,
-                          const TargetRegisterClass *trc) {
+  LiveIndex insertLoadAfter(MachineInstr *mi, unsigned ss,
+                                    unsigned vreg,
+                                    const TargetRegisterClass *trc) {
 
     MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
-    unsigned miIdx = makeSpaceAfter(mi);
+    LiveIndex miIdx = makeSpaceAfter(mi);
 
     tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(next(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    unsigned loadInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+    LiveIndex loadInstIdx = lis->getNextIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -172,15 +174,15 @@ protected:
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertLoadBefore(MachineInstr *mi, unsigned ss,
-                            unsigned vreg,
-                            const TargetRegisterClass *trc) {  
-    unsigned miIdx = makeSpaceBefore(mi);
+  LiveIndex insertLoadBefore(MachineInstr *mi, unsigned ss,
+                                     unsigned vreg,
+                                     const TargetRegisterClass *trc) {  
+    LiveIndex miIdx = makeSpaceBefore(mi);
   
     tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(prior(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+    LiveIndex loadInstIdx = lis->getPrevIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
            "Load inst index already in use.");
@@ -195,14 +197,15 @@ protected:
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
 
-    unsigned loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
-    unsigned start = lis->getDefIndex(loadInstIdx),
-             end = lis->getUseIndex(lis->getInstructionIndex(mi));
+    LiveIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
+    LiveIndex start = lis->getDefIndex(loadInstIdx),
+                      end = lis->getUseIndex(lis->getInstructionIndex(mi));
 
     VNInfo *vni =
       li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
-    vni->kills.push_back(lis->getInstructionIndex(mi));
-    DOUT << "    Intserting load range: [" << start << ", " << end << ")\n";
+    vni->addKill(lis->getInstructionIndex(mi));
+    DEBUG(errs() << "    Intserting load range: [" << start
+                 << ", " << end << ")\n");
     LiveRange lr(start, end, vni);
 
     li->addRange(lr);
@@ -214,7 +217,7 @@ protected:
   /// immediately before each use, and stores after each def. No folding is
   /// attempted.
   std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
-    DOUT << "Spilling everywhere " << *li << "\n";
+    DEBUG(errs() << "Spilling everywhere " << *li << "\n");
 
     assert(li->weight != HUGE_VALF &&
            "Attempting to spill already spilled value.");
@@ -222,7 +225,7 @@ protected:
     assert(!li->isStackSlot() &&
            "Trying to spill a stack slot.");
 
-    DOUT << "Trivial spill everywhere of reg" << li->reg << "\n";
+    DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n");
 
     std::vector<LiveInterval*> added;
     
@@ -234,7 +237,7 @@ protected:
 
       MachineInstr *mi = &*regItr;
 
-      DOUT << "  Processing " << *mi;
+      DEBUG(errs() << "  Processing " << *mi);
 
       do {
         ++regItr;
@@ -318,23 +321,21 @@ public:
     vrm->assignVirt2StackSlot(li->reg, ss);
 
     MachineInstr *mi = 0;
-    unsigned storeIdx = 0;
+    LiveIndex storeIdx = LiveIndex();
 
     if (valno->isDefAccurate()) {
       // If we have an accurate def we can just grab an iterator to the instr
       // after the def.
       mi = lis->getInstructionFromIndex(valno->def);
-      storeIdx = insertStoreAfter(mi, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::DEF;
+      storeIdx = lis->getDefIndex(insertStoreAfter(mi, ss, li->reg, trc));
     } else {
       // if we get here we have a PHI def.
       mi = &lis->getMBBFromIndex(valno->def)->front();
-      storeIdx = insertStoreBefore(mi, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::DEF;
+      storeIdx = lis->getDefIndex(insertStoreBefore(mi, ss, li->reg, trc));
     }
 
     MachineBasicBlock *defBlock = mi->getParent();
-    unsigned loadIdx = 0;
+    LiveIndex loadIdx = LiveIndex();
 
     // Now we need to find the load...
     MachineBasicBlock::iterator useItr(mi);
@@ -342,13 +343,11 @@ public:
 
     if (useItr != defBlock->end()) {
       MachineInstr *loadInst = useItr;
-      loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::USE;
+      loadIdx = lis->getUseIndex(insertLoadBefore(loadInst, ss, li->reg, trc));
     }
     else {
       MachineInstr *loadInst = &defBlock->back();
-      loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::USE;
+      loadIdx = lis->getUseIndex(insertLoadAfter(loadInst, ss, li->reg, trc));
     }
 
     li->removeRange(storeIdx, loadIdx, true);
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index c179f1e..350bc6e 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -148,7 +148,8 @@ bool StackProtector::InsertStackProtectors() {
       //     StackGuard = load __stack_chk_guard
       //     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
       // 
-      PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty);
+      PointerType *PtrTy = PointerType::getUnqual(
+          Type::getInt8Ty(RI->getContext()));
       StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
 
       BasicBlock &Entry = F->getEntryBlock();
@@ -201,7 +202,7 @@ bool StackProtector::InsertStackProtectors() {
     // Generate the stack protector instructions in the old basic block.
     LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
     LoadInst *LI2 = new LoadInst(AI, "", true, BB);
-    ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB);
+    ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
     BranchInst::Create(NewBB, FailBB, Cmp, BB);
   }
 
@@ -215,10 +216,12 @@ bool StackProtector::InsertStackProtectors() {
 /// CreateFailBB - Create a basic block to jump to when the stack protector
 /// check fails.
 BasicBlock *StackProtector::CreateFailBB() {
-  BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F);
+  BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+                                          "CallStackCheckFailBlk", F);
   Constant *StackChkFail =
-    M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL);
+    M->getOrInsertFunction("__stack_chk_fail",
+                           Type::getVoidTy(F->getContext()), NULL);
   CallInst::Create(StackChkFail, "", FailBB);
-  new UnreachableInst(FailBB);
+  new UnreachableInst(F->getContext(), FailBB);
   return FailBB;
 }
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 5824644..fad0808 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CommandLine.h"
@@ -97,6 +98,7 @@ namespace {
       MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<LiveStacks>();
       AU.addRequired<VirtRegMap>();
       AU.addPreserved<VirtRegMap>();      
@@ -197,7 +199,7 @@ void StackSlotColoring::InitializeSlots() {
   Assignments.resize(LastFI);
 
   // Gather all spill slots into a list.
-  DOUT << "Spill slot intervals:\n";
+  DEBUG(errs() << "Spill slot intervals:\n");
   for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
     LiveInterval &li = i->second;
     DEBUG(li.dump());
@@ -209,7 +211,7 @@ void StackSlotColoring::InitializeSlots() {
     OrigSizes[FI]      = MFI->getObjectSize(FI);
     AllColors.set(FI);
   }
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 
   // Sort them by weight.
   std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
@@ -241,7 +243,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
     return false;
 
   bool Changed = false;
-  DOUT << "Assigning unused registers to spill slots:\n";
+  DEBUG(errs() << "Assigning unused registers to spill slots:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -271,7 +273,8 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
         AllColored = false;
         continue;
       } else {
-        DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n';
+        DEBUG(errs() << "Assigning fi#" << RSS << " to "
+                     << TRI->getName(Reg) << '\n');
         ColoredRegs.push_back(Reg);
         SlotMapping[RSS] = Reg;
         SlotIsReg.set(RSS);
@@ -298,7 +301,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
       ++NumEliminated;
     }
   }
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 
   return Changed;
 }
@@ -333,7 +336,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
   // Record the assignment.
   Assignments[Color].push_back(li);
   int FI = li->getStackSlotIndex();
-  DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n";
+  DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
   // objects sharing the same slot, then make sure the size and alignment
@@ -357,7 +360,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   BitVector SlotIsReg(NumObjs);
   BitVector UsedColors(NumObjs);
 
-  DOUT << "Color spill slot intervals:\n";
+  DEBUG(errs() << "Color spill slot intervals:\n");
   bool Changed = false;
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
@@ -371,7 +374,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
     Changed |= (SS != NewSS);
   }
 
-  DOUT << "\nSpill slots after coloring:\n";
+  DEBUG(errs() << "\nSpill slots after coloring:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -383,7 +386,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 #ifndef NDEBUG
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
     DEBUG(SSIntervals[i]->dump());
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 #endif
 
   // Can we "color" a stack slot with a unused register?
@@ -415,7 +418,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 
   // Delete unused stack slots.
   while (NextColor != -1) {
-    DOUT << "Removing unused stack object fi#" << NextColor << "\n";
+    DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n");
     MFI->RemoveStackObject(NextColor);
     NextColor = AllColors.find_next(NextColor);
   }
@@ -449,6 +452,7 @@ bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
 /// to old frame index with new one.
 void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
                                            int NewFI, MachineFunction &MF) {
+  // Update the operands.
   for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isFI())
@@ -459,22 +463,15 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
     MO.setIndex(NewFI);
   }
 
-  // Update the MachineMemOperand for the new memory location.
-  // FIXME: We need a better method of managing these too.
-  SmallVector<MachineMemOperand, 2> MMOs(MI->memoperands_begin(),
-                                         MI->memoperands_end());
-  MI->clearMemOperands(MF);
+  // Update the memory references. This changes the MachineMemOperands
+  // directly. They may be in use by multiple instructions, however all
+  // instructions using OldFI are being rewritten to use NewFI.
   const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
-  for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) {
-    if (MMOs[i].getValue() != OldSV)
-      MI->addMemOperand(MF, MMOs[i]);
-    else {
-      MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI),
-                            MMOs[i].getFlags(), MMOs[i].getOffset(),
-                            MMOs[i].getSize(),  MMOs[i].getAlignment());
-      MI->addMemOperand(MF, MMO);
-    }
-  }
+  const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I)
+    if ((*I)->getValue() == OldSV)
+      (*I)->setValue(NewSV);
 }
 
 /// PropagateBackward - Traverse backward and look for the definition of
@@ -503,7 +500,16 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
       if (Reg == OldReg) {
         if (MO.isImplicit())
           return false;
-        const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+
+        // Abort the use is actually a sub-register def. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg() ||
+            TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+            TID.getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+            TID.getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+          return false;
+
+        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
         if (RC && !RC->contains(NewReg))
           return false;
 
@@ -547,7 +553,6 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
 
   SmallVector<MachineOperand*, 4> Uses;
   while (++MII != MBB->end()) {
-    bool FoundUse = false;
     bool FoundKill = false;
     const TargetInstrDesc &TID = MII->getDesc();
     for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
@@ -561,12 +566,18 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
         if (MO.isDef() || MO.isImplicit())
           return false;
 
-        const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+        // Abort the use is actually a sub-register use. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg() ||
+            TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+          return false;
+
+        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
         if (RC && !RC->contains(NewReg))
           return false;
-        FoundUse = true;
         if (MO.isKill())
           FoundKill = true;
+
         Uses.push_back(&MO);
       } else if (TRI->regsOverlap(Reg, NewReg) ||
                  TRI->regsOverlap(Reg, OldReg))
@@ -593,7 +604,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
   MachineBasicBlock *MBB = MI->getParent();
   if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
     if (PropagateForward(MI, MBB, DstReg, Reg)) {
-      DOUT << "Eliminated load: ";
+      DEBUG(errs() << "Eliminated load: ");
       DEBUG(MI->dump());
       ++NumLoadElim;
     } else {
@@ -609,7 +620,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
     }
   } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
     if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
-      DOUT << "Eliminated store: ";
+      DEBUG(errs() << "Eliminated store: ");
       DEBUG(MI->dump());
       ++NumStoreElim;
     } else {
@@ -687,7 +698,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 
 
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "********** Stack Slot Coloring **********\n";
+  DEBUG(errs() << "********** Stack Slot Coloring **********\n");
 
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo(); 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index ca99528..48d6dc1 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -71,6 +71,7 @@ namespace {
     bool runOnMachineFunction(MachineFunction &Fn);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<LiveIntervals>();
       
@@ -294,7 +295,7 @@ StrongPHIElimination::computeDomForest(
 static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
                      LiveIntervals& LI) {
   LiveInterval& I = LI.getOrCreateInterval(r);
-  unsigned idx = LI.getMBBStartIdx(MBB);
+  LiveIndex idx = LI.getMBBStartIdx(MBB);
   return I.liveAt(idx);
 }
 
@@ -427,7 +428,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     }
 
     LiveInterval& PI = LI.getOrCreateInterval(DestReg);
-    unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
+    LiveIndex pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
     VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
     PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
 
@@ -553,8 +554,8 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     // Add the renaming set for this PHI node to our overall renaming information
     for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
          QE = PHIUnion.end(); QI != QE; ++QI) {
-      DOUT << "Adding Renaming: " << QI->first << " -> "
-           << P->getOperand(0).getReg() << "\n";
+      DEBUG(errs() << "Adding Renaming: " << QI->first << " -> "
+                   << P->getOperand(0).getReg() << "\n");
     }
     
     RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
@@ -696,7 +697,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
         TII->copyRegToReg(*PI->getParent(), PI, t,
                           curr.second, RC, RC);
         
-        DOUT << "Inserted copy from " << curr.second << " to " << t << "\n";
+        DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t
+                     << "\n");
         
         // Push temporary on Stacks
         Stacks[curr.second].push_back(t);
@@ -712,8 +714,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
                         map[curr.first], RC, RC);
       map[curr.first] = curr.second;
-      DOUT << "Inserted copy from " << curr.first << " to "
-           << curr.second << "\n";
+      DEBUG(errs() << "Inserted copy from " << curr.first << " to "
+                   << curr.second << "\n");
       
       // Push this copy onto InsertedPHICopies so we can
       // update LiveIntervals with it.
@@ -746,7 +748,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       
       LiveInterval& I = LI.getInterval(curr.second);
       MachineBasicBlock::iterator term = MBB->getFirstTerminator();
-      unsigned endIdx = 0;
+      LiveIndex endIdx = LiveIndex();
       if (term != MBB->end())
         endIdx = LI.getInstructionIndex(term);
       else
@@ -782,16 +784,15 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
        InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
     if (RegHandled.insert(I->first).second) {
       LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      unsigned instrIdx = LI.getInstructionIndex(I->second);
-      if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
-        Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
-                        LI.getMBBEndIdx(I->second->getParent())+1,
+      LiveIndex instrIdx = LI.getInstructionIndex(I->second);
+      if (Int.liveAt(LI.getDefIndex(instrIdx)))
+        Int.removeRange(LI.getDefIndex(instrIdx),
+                        LI.getNextSlot(LI.getMBBEndIdx(I->second->getParent())),
                         true);
       
       LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
-      R.valno->copy = I->second;
-      R.valno->def =
-                  LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second));
+      R.valno->setCopy(I->second);
+      R.valno->def = LI.getDefIndex(LI.getInstructionIndex(I->second));
     }
   }
 }
@@ -817,7 +818,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         // Remove the live range for the old vreg.
         LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
         LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining(
-                  LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+                  LI.getUseIndex(LI.getInstructionIndex(I)));
         if (OldLR != OldInt.end())
           OldInt.removeRange(*OldLR, true);
         
@@ -829,11 +830,11 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         VNInfo* FirstVN = *Int.vni_begin();
         FirstVN->setHasPHIKill(false);
         if (I->getOperand(i).isKill())
-          FirstVN->kills.push_back(
-                         LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+          FirstVN->addKill(
+                 LI.getUseIndex(LI.getInstructionIndex(I)));
         
         LiveRange LR (LI.getMBBStartIdx(I->getParent()),
-                      LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1,
+                      LI.getNextSlot(LI.getUseIndex(LI.getInstructionIndex(I))),
                       FirstVN);
         
         Int.addRange(LR);
@@ -868,8 +869,8 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
   for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
     LiveRange R = *I;
  
-    unsigned Start = R.start;
-    unsigned End = R.end;
+    LiveIndex Start = R.start;
+    LiveIndex End = R.end;
     if (LHS.getLiveRangeContaining(Start))
       return false;
     
@@ -927,7 +928,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         unsigned reg = OI->first;
         ++OI;
         I->second.erase(reg);
-        DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n";
+        DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first
+                     << "\n");
       }
     }
   }
@@ -944,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
     while (I->second.size()) {
       std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
       
-      DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n";
+      DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
       
       if (SI->first != I->first) {
         if (mergeLiveIntervals(I->first, SI->first)) {
@@ -965,19 +967,19 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
           LI.computeNumbering();
           
           LiveInterval& Int = LI.getOrCreateInterval(I->first);
-          unsigned instrIdx =
+          LiveIndex instrIdx =
                      LI.getInstructionIndex(--SI->second->getFirstTerminator());
-          if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
-            Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
-                            LI.getMBBEndIdx(SI->second)+1, true);
+          if (Int.liveAt(LI.getDefIndex(instrIdx)))
+            Int.removeRange(LI.getDefIndex(instrIdx),
+                            LI.getNextSlot(LI.getMBBEndIdx(SI->second)), true);
 
           LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
                                             --SI->second->getFirstTerminator());
-          R.valno->copy = --SI->second->getFirstTerminator();
-          R.valno->def = LiveIntervals::getDefIndex(instrIdx);
+          R.valno->setCopy(--SI->second->getFirstTerminator());
+          R.valno->def = LI.getDefIndex(instrIdx);
           
-          DOUT << "Renaming failed: " << SI->first << " -> "
-               << I->first << "\n";
+          DEBUG(errs() << "Renaming failed: " << SI->first << " -> "
+                       << I->first << "\n");
         }
       }
       
@@ -1009,7 +1011,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       if (PI.containsOneValue()) {
         LI.removeInterval(DestReg);
       } else {
-        unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+        LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
         PI.removeRange(*PI.getLiveRangeContaining(idx), true);
       }
     } else {
@@ -1023,8 +1025,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         LiveInterval& InputI = LI.getInterval(reg);
         if (MBB != PInstr->getParent() &&
             InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
-            InputI.expiredAt(LI.getInstructionIndex(PInstr) + 
-                             LiveInterval::InstrSlots::NUM))
+            InputI.expiredAt(LI.getNextIndex(LI.getInstructionIndex(PInstr))))
           InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
                              LI.getInstructionIndex(PInstr),
                              true);
@@ -1032,7 +1033,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       
       // If the PHI is not dead, then the valno defined by the PHI
       // now has an unknown def.
-      unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+      LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
       const LiveRange* PLR = PI.getLiveRangeContaining(idx);
       PLR->valno->setIsPHIDef(true);
       LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index b759599..c646869 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -13,21 +13,35 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // commuteInstruction - The default implementation of this method just exchanges
-// operand 1 and 2.
+// the two operands returned by findCommutedOpIndices.
 MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
                                                       bool NewMI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   bool HasDef = TID.getNumDefs();
-  unsigned Idx1 = HasDef ? 1 : 0;
-  unsigned Idx2 = HasDef ? 2 : 1;
+  if (HasDef && !MI->getOperand(0).isReg())
+    // No idea how to commute this instruction. Target should implement its own.
+    return 0;
+  unsigned Idx1, Idx2;
+  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Don't know how to commute: " << *MI;
+    llvm_report_error(Msg.str());
+  }
 
   assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
          "This only knows how to commute register operands so far");
@@ -70,26 +84,24 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
   return MI;
 }
 
-/// CommuteChangesDestination - Return true if commuting the specified
-/// instruction will also changes the destination operand. Also return the
-/// current operand index of the would be new destination register by
-/// reference. This can happen when the commutable instruction is also a
-/// two-address instruction.
-bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI,
-                                                    unsigned &OpIdx) const{
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
+                                                unsigned &SrcOpIdx1,
+                                                unsigned &SrcOpIdx2) const {
   const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getNumDefs())
+  if (!TID.isCommutable())
     return false;
-  assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
-         "This only knows how to commute register operands so far");
-  if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
-    // Must be two address instruction!
-    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
-           "Expecting a two-address instruction!");
-    OpIdx = 2;
-    return true;
-  }
-  return false;
+  // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+  // is not true, then the target must implement this.
+  SrcOpIdx1 = TID.getNumDefs();
+  SrcOpIdx2 = SrcOpIdx1 + 1;
+  if (!MI->getOperand(SrcOpIdx1).isReg() ||
+      !MI->getOperand(SrcOpIdx2).isReg())
+    // No idea.
+    return false;
+  return true;
 }
 
 
@@ -122,9 +134,12 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
 void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator I,
                                         unsigned DestReg,
+                                        unsigned SubIdx,
                                         const MachineInstr *Orig) const {
   MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
-  MI->getOperand(0).setReg(DestReg);
+  MachineOperand &MO = MI->getOperand(0);
+  MO.setReg(DestReg);
+  MO.setSubReg(SubIdx);
   MBB.insert(I, MI);
 }
 
@@ -171,11 +186,11 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
          "Folded a use to a non-load!");
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   assert(MFI.getObjectOffset(FrameIndex) != -1);
-  MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex),
-                        Flags,
-                        MFI.getObjectOffset(FrameIndex),
-                        MFI.getObjectSize(FrameIndex),
-                        MFI.getObjectAlignment(FrameIndex));
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex),
+                            Flags, /*Offset=*/0,
+                            MFI.getObjectSize(FrameIndex),
+                            MFI.getObjectAlignment(FrameIndex));
   NewMI->addMemOperand(MF, MMO);
 
   return NewMI;
@@ -200,9 +215,93 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
   if (!NewMI) return 0;
 
   // Copy the memoperands from the load to the folded instruction.
-  for (std::list<MachineMemOperand>::iterator I = LoadMI->memoperands_begin(),
-       E = LoadMI->memoperands_end(); I != E; ++I)
-    NewMI->addMemOperand(MF, *I);
+  NewMI->setMemRefs(LoadMI->memoperands_begin(),
+                    LoadMI->memoperands_end());
 
   return NewMI;
 }
+
+bool
+TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
+                                                            MI,
+                                                          AliasAnalysis *
+                                                            AA) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  // A load from a fixed stack slot can be rematerialized. This may be
+  // redundant with subsequent checks, but it's target-independent,
+  // simple, and a common case.
+  int FrameIdx = 0;
+  if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+    return true;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+
+  // Avoid instructions obviously unsafe for remat.
+  if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
+      TID.mayStore())
+    return false;
+
+  // Avoid instructions which load from potentially varying memory.
+  if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+    return false;
+
+  // If any of the registers accessed are non-constant, conservatively assume
+  // the instruction is not rematerializable.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // Check for a well-behaved physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI.def_empty(Reg))
+          return false;
+        BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
+        if (AllocatableRegs.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI.def_empty(AliasReg))
+            return false;
+          if (AllocatableRegs.test(AliasReg))
+            return false;
+        }
+      } else {
+        // A physreg def. We can't remat it.
+        return false;
+      }
+      continue;
+    }
+
+    // Only allow one virtual-register def, and that in the first operand.
+    if (MO.isDef() != (i == 0))
+      return false;
+
+    // For the def, it should be the only def of that register.
+    if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() ||
+                       MRI.isLiveIn(Reg)))
+      return false;
+
+    // Don't allow any virtual-register uses. Rematting an instruction with
+    // virtual register uses would length the live ranges of the uses, which
+    // is not necessarily a good idea, certainly not "trivial".
+    if (MO.isUse())
+      return false;
+  }
+
+  // Everything checked out.
+  return true;
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 3c40404..a5a0f5b 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -62,6 +63,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo *MRI;
     LiveVariables *LV;
+    AliasAnalysis *AA;
 
     // DistanceMap - Keep track the distance of a MI from the start of the
     // current basic block.
@@ -106,13 +108,31 @@ namespace {
                             MachineFunction::iterator &mbbi,
                             unsigned RegB, unsigned Dist);
 
+    typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
+    bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                               SmallVector<NewKill, 4> &NewKills,
+                               MachineBasicBlock *MBB, unsigned Dist);
+    bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                           MachineBasicBlock::iterator &nmi,
+                           MachineFunction::iterator &mbbi,
+                           unsigned regB, unsigned regBIdx, unsigned Dist);
+
+    bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                                 MachineBasicBlock::iterator &nmi,
+                                 MachineFunction::iterator &mbbi,
+                                 unsigned SrcIdx, unsigned DstIdx,
+                                 unsigned Dist);
+
     void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
                      SmallPtrSet<MachineInstr*, 8> &Processed);
+
   public:
     static char ID; // Pass identification, replacement for typeid
     TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
       AU.addPreserved<LiveVariables>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
@@ -143,7 +163,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
                                            MachineBasicBlock::iterator OldPos) {
   // Check if it's safe to move this instruction.
   bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(TII, SeenStore))
+  if (!MI->isSafeToMove(TII, SeenStore, AA))
     return false;
 
   unsigned DefReg = 0;
@@ -556,15 +576,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
                                MachineFunction::iterator &mbbi,
                                unsigned RegB, unsigned RegC, unsigned Dist) {
   MachineInstr *MI = mi;
-  DOUT << "2addr: COMMUTING  : " << *MI;
+  DEBUG(errs() << "2addr: COMMUTING  : " << *MI);
   MachineInstr *NewMI = TII->commuteInstruction(MI);
 
   if (NewMI == 0) {
-    DOUT << "2addr: COMMUTING FAILED!\n";
+    DEBUG(errs() << "2addr: COMMUTING FAILED!\n");
     return false;
   }
 
-  DOUT << "2addr: COMMUTED TO: " << *NewMI;
+  DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI);
   // If the instruction changed to commute it, update livevar.
   if (NewMI != MI) {
     if (LV)
@@ -611,8 +631,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               unsigned RegB, unsigned Dist) {
   MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
   if (NewMI) {
-    DOUT << "2addr: CONVERTING 2-ADDR: " << *mi;
-    DOUT << "2addr:         TO 3-ADDR: " << *NewMI;
+    DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+    DEBUG(errs() << "2addr:         TO 3-ADDR: " << *NewMI);
     bool Sunk = false;
 
     if (NewMI->findRegisterUseOperand(RegB, false, TRI))
@@ -734,25 +754,174 @@ static bool isSafeToDelete(MachineInstr *MI, unsigned Reg,
   return true;
 }
 
+/// canUpdateDeletedKills - Check if all the registers listed in Kills are
+/// killed by instructions in MBB preceding the current instruction at
+/// position Dist.  If so, return true and record information about the
+/// preceding kills in NewKills.
+bool TwoAddressInstructionPass::
+canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                      SmallVector<NewKill, 4> &NewKills,
+                      MachineBasicBlock *MBB, unsigned Dist) {
+  while (!Kills.empty()) {
+    unsigned Kill = Kills.back();
+    Kills.pop_back();
+    if (TargetRegisterInfo::isPhysicalRegister(Kill))
+      return false;
+
+    MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
+    if (!LastKill)
+      return false;
+
+    bool isModRef = LastKill->modifiesRegister(Kill);
+    NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
+                                      LastKill));
+  }
+  return true;
+}
+
+/// DeleteUnusedInstr - If an instruction with a tied register operand can
+/// be safely deleted, just delete it.
+bool
+TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                                             MachineBasicBlock::iterator &nmi,
+                                             MachineFunction::iterator &mbbi,
+                                             unsigned regB, unsigned regBIdx,
+                                             unsigned Dist) {
+  // Check if the instruction has no side effects and if all its defs are dead.
+  SmallVector<unsigned, 4> Kills;
+  if (!isSafeToDelete(mi, regB, TII, Kills))
+    return false;
+
+  // If this instruction kills some virtual registers, we need to
+  // update the kill information. If it's not possible to do so,
+  // then bail out.
+  SmallVector<NewKill, 4> NewKills;
+  if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
+    return false;
+
+  if (LV) {
+    while (!NewKills.empty()) {
+      MachineInstr *NewKill = NewKills.back().second;
+      unsigned Kill = NewKills.back().first.first;
+      bool isDead = NewKills.back().first.second;
+      NewKills.pop_back();
+      if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+        if (isDead)
+          LV->addVirtualRegisterDead(Kill, NewKill);
+        else
+          LV->addVirtualRegisterKilled(Kill, NewKill);
+      }
+    }
+
+    // If regB was marked as a kill, update its Kills list.
+    if (mi->getOperand(regBIdx).isKill())
+      LV->removeVirtualRegisterKilled(regB, mi);
+  }
+
+  mbbi->erase(mi); // Nuke the old inst.
+  mi = nmi;
+  return true;
+}
+
+/// TryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy.  Returns true if the tied operands
+/// are eliminated altogether.
+bool TwoAddressInstructionPass::
+TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                        MachineBasicBlock::iterator &nmi,
+                        MachineFunction::iterator &mbbi,
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
+  const TargetInstrDesc &TID = mi->getDesc();
+  unsigned regA = mi->getOperand(DstIdx).getReg();
+  unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+         "cannot make instruction into two-address form");
+
+  // If regA is dead and the instruction can be deleted, just delete
+  // it so it doesn't clobber regB.
+  bool regBKilled = isKilled(*mi, regB, MRI, TII);
+  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+      DeleteUnusedInstr(mi, nmi, mbbi, regB, SrcIdx, Dist)) {
+    ++NumDeletes;
+    return true; // Done with this instruction.
+  }
+
+  // Check if it is profitable to commute the operands.
+  unsigned SrcOp1, SrcOp2;
+  unsigned regC = 0;
+  unsigned regCIdx = ~0U;
+  bool TryCommute = false;
+  bool AggressiveCommute = false;
+  if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+    if (SrcIdx == SrcOp1)
+      regCIdx = SrcOp2;
+    else if (SrcIdx == SrcOp2)
+      regCIdx = SrcOp1;
+
+    if (regCIdx != ~0U) {
+      regC = mi->getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+        // If C dies but B does not, swap the B and C operands.
+        // This makes the live ranges of A and C joinable.
+        TryCommute = true;
+      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+        TryCommute = true;
+        AggressiveCommute = true;
+      }
+    }
+  }
+
+  // If it's profitable to commute, try to do so.
+  if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+    ++NumCommuted;
+    if (AggressiveCommute)
+      ++NumAggrCommuted;
+    return false;
+  }
+
+  if (TID.isConvertibleTo3Addr()) {
+    // This instruction is potentially convertible to a true
+    // three-address instruction.  Check if it is profitable.
+    if (!regBKilled || isProfitableToConv3Addr(regA)) {
+      // Try to convert it.
+      if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+        ++NumConvertedTo3Addr;
+        return true; // Done with this instruction.
+      }
+    }
+  }
+  return false;
+}
+
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "Machine Function\n";
+  DEBUG(errs() << "Machine Function\n");
   const TargetMachine &TM = MF.getTarget();
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
   LV = getAnalysisIfAvailable<LiveVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
 
   bool MadeChange = false;
 
-  DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n";
-  DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+  DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+  DEBUG(errs() << "********** Function: " 
+        << MF.getFunction()->getName() << '\n');
 
   // ReMatRegs - Keep track of the registers whose def's are remat'ed.
   BitVector ReMatRegs;
   ReMatRegs.resize(MRI->getLastVirtReg()+1);
 
+  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
+    TiedOperandMap;
+  TiedOperandMap TiedOperands(4);
+
   SmallPtrSet<MachineInstr*, 8> Processed;
   for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
        mbbi != mbbe; ++mbbi) {
@@ -771,175 +940,102 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
       ProcessCopy(&*mi, &*mbbi, Processed);
 
+      // First scan through all the tied register uses in this instruction
+      // and record a list of pairs of tied operands for each register.
       unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
         ? mi->getNumOperands() : TID.getNumOperands();
-      for (unsigned si = 0; si < NumOps; ++si) {
-        unsigned ti = 0;
-        if (!mi->isRegTiedToDefOperand(si, &ti))
+      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+        unsigned DstIdx = 0;
+        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
           continue;
 
         if (FirstTied) {
+          FirstTied = false;
           ++NumTwoAddressInstrs;
-          DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM));
+          DEBUG(errs() << '\t' << *mi);
         }
 
-        FirstTied = false;
+        assert(mi->getOperand(SrcIdx).isReg() &&
+               mi->getOperand(SrcIdx).getReg() &&
+               mi->getOperand(SrcIdx).isUse() &&
+               "two address instruction invalid");
 
-        assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() &&
-               mi->getOperand(si).isUse() && "two address instruction invalid");
+        unsigned regB = mi->getOperand(SrcIdx).getReg();
+        TiedOperandMap::iterator OI = TiedOperands.find(regB);
+        if (OI == TiedOperands.end()) {
+          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
+          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
+        }
+        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+      }
 
-        // If the two operands are the same we just remove the use
-        // and mark the def as def&use, otherwise we have to insert a copy.
-        if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) {
-          // Rewrite:
-          //     a = b op c
-          // to:
-          //     a = b
-          //     a = a op c
-          unsigned regA = mi->getOperand(ti).getReg();
-          unsigned regB = mi->getOperand(si).getReg();
+      // Now iterate over the information collected above.
+      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+             OE = TiedOperands.end(); OI != OE; ++OI) {
+        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
+
+        // If the instruction has a single pair of tied operands, try some
+        // transformations that may either eliminate the tied operands or
+        // improve the opportunities for coalescing away the register copy.
+        if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+          unsigned SrcIdx = TiedPairs[0].first;
+          unsigned DstIdx = TiedPairs[0].second;
+
+          // If the registers are already equal, nothing needs to be done.
+          if (mi->getOperand(SrcIdx).getReg() ==
+              mi->getOperand(DstIdx).getReg())
+            break; // Done with this instruction.
+
+          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
+            break; // The tied operands have been eliminated.
+        }
+
+        bool RemovedKillFlag = false;
+        bool AllUsesCopied = true;
+        unsigned LastCopiedReg = 0;
+        unsigned regB = OI->first;
+        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+          unsigned SrcIdx = TiedPairs[tpi].first;
+          unsigned DstIdx = TiedPairs[tpi].second;
+          unsigned regA = mi->getOperand(DstIdx).getReg();
+          // Grab regB from the instruction because it may have changed if the
+          // instruction was commuted.
+          regB = mi->getOperand(SrcIdx).getReg();
+
+          if (regA == regB) {
+            // The register is tied to multiple destinations (or else we would
+            // not have continued this far), but this use of the register
+            // already matches the tied destination.  Leave it.
+            AllUsesCopied = false;
+            continue;
+          }
+          LastCopiedReg = regA;
 
           assert(TargetRegisterInfo::isVirtualRegister(regB) &&
-                 "cannot update physical register live information");
+                 "cannot make instruction into two-address form");
 
 #ifndef NDEBUG
-          // First, verify that we don't have a use of a in the instruction (a =
-          // b + a for example) because our transformation will not work. This
-          // should never occur because we are in SSA form.
+          // First, verify that we don't have a use of "a" in the instruction
+          // (a = b + a for example) because our transformation will not
+          // work. This should never occur because we are in SSA form.
           for (unsigned i = 0; i != mi->getNumOperands(); ++i)
-            assert(i == ti ||
+            assert(i == DstIdx ||
                    !mi->getOperand(i).isReg() ||
                    mi->getOperand(i).getReg() != regA);
 #endif
 
-          // If this instruction is not the killing user of B, see if we can
-          // rearrange the code to make it so.  Making it the killing user will
-          // allow us to coalesce A and B together, eliminating the copy we are
-          // about to insert.
-          if (!isKilled(*mi, regB, MRI, TII)) {
-            // If regA is dead and the instruction can be deleted, just delete
-            // it so it doesn't clobber regB.
-            SmallVector<unsigned, 4> Kills;
-            if (mi->getOperand(ti).isDead() &&
-                isSafeToDelete(mi, regB, TII, Kills)) {
-              SmallVector<std::pair<std::pair<unsigned, bool>
-                ,MachineInstr*>, 4> NewKills;
-              bool ReallySafe = true;
-              // If this instruction kills some virtual registers, we need
-              // update the kill information. If it's not possible to do so,
-              // then bail out.
-              while (!Kills.empty()) {
-                unsigned Kill = Kills.back();
-                Kills.pop_back();
-                if (TargetRegisterInfo::isPhysicalRegister(Kill)) {
-                  ReallySafe = false;
-                  break;
-                }
-                MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist);
-                if (LastKill) {
-                  bool isModRef = LastKill->modifiesRegister(Kill);
-                  NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef),
-                                                    LastKill));
-                } else {
-                  ReallySafe = false;
-                  break;
-                }
-              }
-
-              if (ReallySafe) {
-                if (LV) {
-                  while (!NewKills.empty()) {
-                    MachineInstr *NewKill = NewKills.back().second;
-                    unsigned Kill = NewKills.back().first.first;
-                    bool isDead = NewKills.back().first.second;
-                    NewKills.pop_back();
-                    if (LV->removeVirtualRegisterKilled(Kill,  mi)) {
-                      if (isDead)
-                        LV->addVirtualRegisterDead(Kill, NewKill);
-                      else
-                        LV->addVirtualRegisterKilled(Kill, NewKill);
-                    }
-                  }
-                }
-
-                // We're really going to nuke the old inst. If regB was marked
-                // as a kill we need to update its Kills list.
-                if (mi->getOperand(si).isKill())
-                  LV->removeVirtualRegisterKilled(regB, mi);
-
-                mbbi->erase(mi); // Nuke the old inst.
-                mi = nmi;
-                ++NumDeletes;
-                break; // Done with this instruction.
-              }
-            }
-
-            // If this instruction is commutative, check to see if C dies.  If
-            // so, swap the B and C operands.  This makes the live ranges of A
-            // and C joinable.
-            // FIXME: This code also works for A := B op C instructions.
-            if (TID.isCommutable() && mi->getNumOperands() >= 3) {
-              assert(mi->getOperand(3-si).isReg() &&
-                     "Not a proper commutative instruction!");
-              unsigned regC = mi->getOperand(3-si).getReg();
-              if (isKilled(*mi, regC, MRI, TII)) {
-                if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
-                  ++NumCommuted;
-                  regB = regC;
-                  goto InstructionRearranged;
-                }
-              }
-            }
-
-            // If this instruction is potentially convertible to a true
-            // three-address instruction,
-            if (TID.isConvertibleTo3Addr()) {
-              // FIXME: This assumes there are no more operands which are tied
-              // to another register.
-#ifndef NDEBUG
-              for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i)
-                assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1);
-#endif
-
-              if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
-                ++NumConvertedTo3Addr;
-                break; // Done with this instruction.
-              }
-            }
-          }
-
-          // If it's profitable to commute the instruction, do so.
-          if (TID.isCommutable() && mi->getNumOperands() >= 3) {
-            unsigned regC = mi->getOperand(3-si).getReg();
-            if (isProfitableToCommute(regB, regC, mi, mbbi, Dist))
-              if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
-                ++NumAggrCommuted;
-                ++NumCommuted;
-                regB = regC;
-                goto InstructionRearranged;
-              }
-          }
-
-          // If it's profitable to convert the 2-address instruction to a
-          // 3-address one, do so.
-          if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) {
-            if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
-              ++NumConvertedTo3Addr;
-              break; // Done with this instruction.
-            }
-          }
-
-        InstructionRearranged:
-          const TargetRegisterClass* rc = MRI->getRegClass(regB);
+          // Emit a copy or rematerialize the definition.
+          const TargetRegisterClass *rc = MRI->getRegClass(regB);
           MachineInstr *DefMI = MRI->getVRegDef(regB);
           // If it's safe and profitable, remat the definition instead of
           // copying it.
           if (DefMI &&
               DefMI->getDesc().isAsCheapAsAMove() &&
-              DefMI->isSafeToReMat(TII, regB) &&
+              DefMI->isSafeToReMat(TII, regB, AA) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
-            DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n");
-            TII->reMaterialize(*mbbi, mi, regA, DefMI);
+            DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
+            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
+            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI);
             ReMatRegs.set(regB);
             ++NumReMats;
           } else {
@@ -953,32 +1049,57 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           DistanceMap.insert(std::make_pair(prevMI, Dist));
           DistanceMap[mi] = ++Dist;
 
-          // Update live variables for regB.
-          if (LV) {
-            if (LV->removeVirtualRegisterKilled(regB,  mi))
-              LV->addVirtualRegisterKilled(regB, prevMI);
+          DEBUG(errs() << "\t\tprepend:\t" << *prevMI);
 
-            if (LV->removeVirtualRegisterDead(regB, mi))
-              LV->addVirtualRegisterDead(regB, prevMI);
+          MachineOperand &MO = mi->getOperand(SrcIdx);
+          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
+                 "inconsistent operand info for 2-reg pass");
+          if (MO.isKill()) {
+            MO.setIsKill(false);
+            RemovedKillFlag = true;
           }
+          MO.setReg(regA);
+        }
 
-          DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM));
-          
-          // Replace all occurences of regB with regA.
+        if (AllUsesCopied) {
+          // Replace other (un-tied) uses of regB with LastCopiedReg.
           for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-            if (mi->getOperand(i).isReg() &&
-                mi->getOperand(i).getReg() == regB)
-              mi->getOperand(i).setReg(regA);
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              if (MO.isKill()) {
+                MO.setIsKill(false);
+                RemovedKillFlag = true;
+              }
+              MO.setReg(LastCopiedReg);
+            }
           }
-        }
 
-        assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse());
-        mi->getOperand(ti).setReg(mi->getOperand(si).getReg());
+          // Update live variables for regB.
+          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
+            LV->addVirtualRegisterKilled(regB, prior(mi));
+
+        } else if (RemovedKillFlag) {
+          // Some tied uses of regB matched their destination registers, so
+          // regB is still used in this instruction, but a kill flag was
+          // removed from a different tied use of regB, so now we need to add
+          // a kill flag to one of the remaining uses of regB.
+          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              MO.setIsKill(true);
+              break;
+            }
+          }
+        }
+          
         MadeChange = true;
 
-        DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM));
+        DEBUG(errs() << "\t\trewrite to:\t" << *mi);
       }
 
+      // Clear TiedOperands here instead of at the top of the loop
+      // since most instructions do not have tied operands.
+      TiedOperands.clear();
       mi = nmi;
     }
   }
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index c3b213c..e7c3412 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,8 +26,11 @@
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
@@ -42,6 +45,10 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     UnreachableBlockElim() : FunctionPass(&ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<ProfileInfo>();
+    }
   };
 }
 char UnreachableBlockElim::ID = 0;
@@ -77,8 +84,11 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
     }
 
   // Actually remove the blocks now.
-  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+  ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+    if (PI) PI->removeBlock(DeadBlocks[i]);
     DeadBlocks[i]->eraseFromParent();
+  }
 
   return DeadBlocks.size();
 }
@@ -88,6 +98,7 @@ namespace {
   class VISIBILITY_HIDDEN UnreachableMachineBlockElim :
         public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     MachineModuleInfo *MMI;
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -102,10 +113,18 @@ Y("unreachable-mbb-elimination",
 
 const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
 
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
 bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
   SmallPtrSet<MachineBasicBlock*, 8> Reachable;
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+  MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
 
   // Mark all reachable blocks.
   for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
@@ -123,6 +142,10 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
     if (!Reachable.count(BB)) {
       DeadBlocks.push_back(BB);
 
+      // Update dominator and loop info.
+      if (MLI) MLI->removeBlock(BB);
+      if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
       while (BB->succ_begin() != BB->succ_end()) {
         MachineBasicBlock* succ = *BB->succ_begin();
 
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 4d3417f..c78f35b 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -258,7 +259,7 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   return AnyUnused;
 }
 
-void VirtRegMap::print(std::ostream &OS, const Module* M) const {
+void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
   const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
 
   OS << "********** REGISTER MAP **********\n";
@@ -277,5 +278,5 @@ void VirtRegMap::print(std::ostream &OS, const Module* M) const {
 }
 
 void VirtRegMap::dump() const {
-  print(cerr);
+  print(errs());
 }
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index fe767b7..bdc2d1f 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -18,13 +18,13 @@
 #define LLVM_CODEGEN_VIRTREGMAP_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Streams.h"
 #include <map>
 
 namespace llvm {
@@ -34,6 +34,7 @@ namespace llvm {
   class MachineRegisterInfo;
   class TargetInstrInfo;
   class TargetRegisterInfo;
+  class raw_ostream;
 
   class VirtRegMap : public MachineFunctionPass {
   public:
@@ -79,7 +80,7 @@ namespace llvm {
 
     /// Virt2SplitKillMap - This is splitted virtual register to its last use
     /// (kill) index mapping.
-    IndexedMap<unsigned> Virt2SplitKillMap;
+    IndexedMap<LiveIndex> Virt2SplitKillMap;
 
     /// ReMatMap - This is virtual register to re-materialized instruction
     /// mapping. Each virtual register whose definition is going to be
@@ -141,7 +142,7 @@ namespace llvm {
     VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
                    Virt2StackSlotMap(NO_STACK_SLOT), 
                    Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
-                   Virt2SplitKillMap(0), ReMatMap(NULL),
+                   Virt2SplitKillMap(LiveIndex()), ReMatMap(NULL),
                    ReMatId(MAX_STACK_SLOT+1),
                    LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
     virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -265,17 +266,17 @@ namespace llvm {
     }
 
     /// @brief record the last use (kill) of a split virtual register.
-    void addKillPoint(unsigned virtReg, unsigned index) {
+    void addKillPoint(unsigned virtReg, LiveIndex index) {
       Virt2SplitKillMap[virtReg] = index;
     }
 
-    unsigned getKillPoint(unsigned virtReg) const {
+    LiveIndex getKillPoint(unsigned virtReg) const {
       return Virt2SplitKillMap[virtReg];
     }
 
     /// @brief remove the last use (kill) of a split virtual register.
     void removeKillPoint(unsigned virtReg) {
-      Virt2SplitKillMap[virtReg] = 0;
+      Virt2SplitKillMap[virtReg] = LiveIndex();
     }
 
     /// @brief returns true if the specified MachineInstr is a spill point.
@@ -481,16 +482,11 @@ namespace llvm {
       return 0;
     }
 
-    void print(std::ostream &OS, const Module* M = 0) const;
-    void print(std::ostream *OS) const { if (OS) print(*OS); }
+    void print(raw_ostream &OS, const Module* M = 0) const;
     void dump() const;
   };
 
-  inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) {
-    VRM.print(OS);
-    return OS;
-  }
-  inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) {
+  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
     VRM.print(OS);
     return OS;
   }
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index be0b016..401bcb6 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -9,10 +9,19 @@
 
 #define DEBUG_TYPE "virtregrewriter"
 #include "VirtRegRewriter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -45,10 +54,15 @@ RewriterOpt("rewriter",
                        clEnumValEnd),
             cl::init(local));
 
+static cl::opt<bool>
+ScheduleSpills("schedule-spills",
+               cl::desc("Schedule spill code"),
+               cl::init(false));
+
 VirtRegRewriter::~VirtRegRewriter() {}
 
+namespace {
 
- 
 /// This class is intended for use with the new spilling framework only. It
 /// rewrites vreg def/uses to use the assigned preg, but does not insert any
 /// spill code.
@@ -56,8 +70,13 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
 
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
-    DOUT << "********** REWRITE MACHINE CODE **********\n";
-    DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+    DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n");
+    DEBUG(errs() << "********** Function: " 
+          << MF.getFunction()->getName() << '\n');
+    DEBUG(errs() << "**** Machine Instrs"
+          << "(NOTE! Does not include spills and reloads!) ****\n");
+    DEBUG(MF.dump());
+
     MachineRegisterInfo *mri = &MF.getRegInfo();
 
     bool changed = false;
@@ -79,14 +98,22 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
         }
       }
     }
+
+    
+    DEBUG(errs() << "**** Post Machine Instrs ****\n");
+    DEBUG(MF.dump());
     
     return changed;
   }
 
 };
 
+}
+
 // ************************************************************************ //
 
+namespace {
+
 /// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
 /// from top down, keep track of which spill slots or remat are available in
 /// each register.
@@ -154,10 +181,11 @@ public:
                                               (unsigned)CanClobber;
 
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1;
+      DEBUG(errs() << "Remembering RM#"
+                   << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
     else
-      DOUT << "Remembering SS#" << SlotOrReMat;
-    DOUT << " in physreg " << TRI->getName(Reg) << "\n";
+      DEBUG(errs() << "Remembering SS#" << SlotOrReMat);
+    DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n");
   }
 
   /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -209,8 +237,82 @@ public:
                                 std::vector<MachineOperand*> &KillOps);
 };
 
+}
+
 // ************************************************************************ //
 
+// Given a location where a reload of a spilled register or a remat of
+// a constant is to be inserted, attempt to find a safe location to
+// insert the load at an earlier point in the basic-block, to hide
+// latency of the load and to avoid address-generation interlock
+// issues.
+static MachineBasicBlock::iterator
+ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
+                 MachineBasicBlock::iterator const Begin,
+                 unsigned PhysReg,
+                 const TargetRegisterInfo *TRI,
+                 bool DoReMat,
+                 int SSorRMId,
+                 const TargetInstrInfo *TII,
+                 const MachineFunction &MF)
+{
+  if (!ScheduleSpills)
+    return InsertLoc;
+
+  // Spill backscheduling is of primary interest to addresses, so
+  // don't do anything if the register isn't in the register class
+  // used for pointers.
+
+  const TargetLowering *TL = MF.getTarget().getTargetLowering();
+
+  if (!TL->isTypeLegal(TL->getPointerTy()))
+    // Believe it or not, this is true on PIC16.
+    return InsertLoc;
+
+  const TargetRegisterClass *ptrRegClass =
+    TL->getRegClassFor(TL->getPointerTy());
+  if (!ptrRegClass->contains(PhysReg))
+    return InsertLoc;
+
+  // Scan upwards through the preceding instructions. If an instruction doesn't
+  // reference the stack slot or the register we're loading, we can
+  // backschedule the reload up past it.
+  MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
+  while (NewInsertLoc != Begin) {
+    MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
+    for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
+      MachineOperand &Op = Prev->getOperand(i);
+      if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
+        goto stop;
+    }
+    if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
+        Prev->findRegisterDefOperand(PhysReg))
+      goto stop;
+    for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
+      if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
+          Prev->findRegisterDefOperand(*Alias))
+        goto stop;
+    NewInsertLoc = Prev;
+  }
+stop:;
+
+  // If we made it to the beginning of the block, turn around and move back
+  // down just past any existing reloads. They're likely to be reloads/remats
+  // for instructions earlier than what our current reload/remat is for, so
+  // they should be scheduled earlier.
+  if (NewInsertLoc == Begin) {
+    int FrameIdx;
+    while (InsertLoc != NewInsertLoc &&
+           (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
+            TII->isTriviallyReMaterializable(NewInsertLoc)))
+      ++NewInsertLoc;
+  }
+
+  return NewInsertLoc;
+}
+
+namespace {
+
 // ReusedOp - For each reused operand, we keep track of a bit of information,
 // in case we need to rollback upon processing a new operand.  See comments
 // below.
@@ -276,7 +378,8 @@ public:
   /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
   /// is some other operand that is using the specified register, either pick
   /// a new register to use, or evict the previous reload and use this reg. 
-  unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+  unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
+                           MachineFunction &MF, MachineInstr *MI,
                            AvailableSpills &Spills,
                            std::vector<MachineInstr*> &MaybeDeadStores,
                            SmallSet<unsigned, 8> &Rejected,
@@ -295,18 +398,21 @@ public:
   ///       sees r1 is taken by t2, tries t2's reload register r0
   ///       sees r0 is taken by t3, tries t3's reload register r1
   ///       sees r1 is taken by t2, tries t2's reload register r0 ...
-  unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+  unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
                            AvailableSpills &Spills,
                            std::vector<MachineInstr*> &MaybeDeadStores,
                            BitVector &RegKills,
                            std::vector<MachineOperand*> &KillOps,
                            VirtRegMap &VRM) {
     SmallSet<unsigned, 8> Rejected;
-    return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected,
-                           RegKills, KillOps, VRM);
+    MachineFunction &MF = *MI->getParent()->getParent();
+    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+    return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
+                           Rejected, RegKills, KillOps, VRM);
   }
 };
 
+}
 
 // ****************** //
 // Utility Functions  //
@@ -489,7 +595,14 @@ static void ReMaterialize(MachineBasicBlock &MBB,
                           const TargetInstrInfo *TII,
                           const TargetRegisterInfo *TRI,
                           VirtRegMap &VRM) {
-  TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg));
+  MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
+#ifndef NDEBUG
+  const TargetInstrDesc &TID = ReMatDefMI->getDesc();
+  assert(TID.getNumDefs() == 1 &&
+         "Don't know how to remat instructions that define > 1 values!");
+#endif
+  TII->reMaterialize(MBB, MII, DestReg,
+                     ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI);
   MachineInstr *NewMI = prior(MII);
   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
@@ -538,8 +651,8 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
-    DOUT << "PhysReg " << TRI->getName(PhysReg)
-         << " copied, it is available for use but can no longer be modified\n";
+    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+         << " copied, it is available for use but can no longer be modified\n");
   }
 }
 
@@ -563,12 +676,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
-    DOUT << "PhysReg " << TRI->getName(PhysReg)
-         << " clobbered, invalidating ";
+    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+          << " clobbered, invalidating ");
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n";
+      DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
     else
-      DOUT << "SS#" << SlotOrReMat << "\n";
+      DEBUG(errs() << "SS#" << SlotOrReMat << "\n");
   }
 }
 
@@ -650,15 +763,17 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
 /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
 /// is some other operand that is using the specified register, either pick
 /// a new register to use, or evict the previous reload and use this reg.
-unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
-                         AvailableSpills &Spills,
+unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
+                         unsigned PhysReg,
+                         MachineFunction &MF,
+                         MachineInstr *MI, AvailableSpills &Spills,
                          std::vector<MachineInstr*> &MaybeDeadStores,
                          SmallSet<unsigned, 8> &Rejected,
                          BitVector &RegKills,
                          std::vector<MachineOperand*> &KillOps,
                          VirtRegMap &VRM) {
-  const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget()
-                               .getInstrInfo();
+  const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Spills.getRegInfo();
   
   if (Reuses.empty()) return PhysReg;  // This is most often empty.
 
@@ -670,19 +785,19 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
     // considered and subsequently rejected because it has also been reused
     // by another operand.
     if (Op.PhysRegReused == PhysReg &&
-        Rejected.count(Op.AssignedPhysReg) == 0) {
+        Rejected.count(Op.AssignedPhysReg) == 0 &&
+        RC->contains(Op.AssignedPhysReg)) {
       // Yup, use the reload register that we didn't use before.
       unsigned NewReg = Op.AssignedPhysReg;
       Rejected.insert(PhysReg);
-      return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected,
+      return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected,
                              RegKills, KillOps, VRM);
     } else {
       // Otherwise, we might also have a problem if a previously reused
-      // value aliases the new register.  If so, codegen the previous reload
+      // value aliases the new register. If so, codegen the previous reload
       // and use this one.          
       unsigned PRRU = Op.PhysRegReused;
-      const TargetRegisterInfo *TRI = Spills.getRegInfo();
-      if (TRI->areAliases(PRRU, PhysReg)) {
+      if (TRI->regsOverlap(PRRU, PhysReg)) {
         // Okay, we found out that an alias of a reused register
         // was used.  This isn't good because it means we have
         // to undo a previous reuse.
@@ -695,21 +810,45 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
         ReusedOp NewOp = Op;
         Reuses.erase(Reuses.begin()+ro);
 
+        // MI may be using only a sub-register of PhysRegUsed.
+        unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
+        unsigned SubIdx = 0;
+        assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
+               "A reuse cannot be a virtual register");
+        if (PRRU != RealPhysRegUsed) {
+          // What was the sub-register index?
+          unsigned SubReg;
+          for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++)
+            if (SubReg == RealPhysRegUsed)
+              break;
+          assert(SubReg == RealPhysRegUsed &&
+                 "Operand physreg is not a sub-register of PhysRegUsed");
+        }
+
         // Ok, we're going to try to reload the assigned physreg into the
         // slot that we were supposed to in the first place.  However, that
         // register could hold a reuse.  Check to see if it conflicts or
         // would prefer us to use a different register.
-        unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg,
-                                              MI, Spills, MaybeDeadStores,
-                                          Rejected, RegKills, KillOps, VRM);
-        
-        MachineBasicBlock::iterator MII = MI;
-        if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) {
-          ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM);
-        } else {
-          TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg,
+        unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
+                                              MF, MI, Spills, MaybeDeadStores,
+                                              Rejected, RegKills, KillOps, VRM);
+
+        bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
+        int SSorRMId = DoReMat
+          ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat;
+
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
+                           DoReMat, SSorRMId, TII, MF);
+
+        if (DoReMat) {
+          ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
+                        TRI, VRM);
+        } else { 
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
                                     NewOp.StackSlotOrReMat, AliasRC);
-          MachineInstr *LoadMI = prior(MII);
+          MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
           // Any stores to this stack slot are not dead anymore.
           MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;            
@@ -718,17 +857,15 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
         Spills.ClobberPhysReg(NewPhysReg);
         Spills.ClobberPhysReg(NewOp.PhysRegReused);
 
-        unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg();
         unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg;
         MI->getOperand(NewOp.Operand).setReg(RReg);
         MI->getOperand(NewOp.Operand).setSubReg(0);
 
         Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
-        --MII;
-        UpdateKills(*MII, TRI, RegKills, KillOps);
-        DOUT << '\t' << *MII;
+        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+        DEBUG(errs() << '\t' << *prior(InsertLoc));
         
-        DOUT << "Reuse undone!\n";
+        DEBUG(errs() << "Reuse undone!\n");
         --NumReused;
         
         // Finally, PhysReg is now available, go ahead and use it.
@@ -856,6 +993,8 @@ namespace {
 // Local Spiller Implementation  //
 // ***************************** //
 
+namespace {
+
 class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter {
   MachineRegisterInfo *RegInfo;
   const TargetRegisterInfo *TRI;
@@ -870,10 +1009,10 @@ public:
     TRI = MF.getTarget().getRegisterInfo();
     TII = MF.getTarget().getInstrInfo();
     AllocatableRegs = TRI->getAllocatableSet(MF);
-    DOUT << "\n**** Local spiller rewriting function '"
-         << MF.getFunction()->getName() << "':\n";
-    DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
-            " ****\n";
+    DEBUG(errs() << "\n**** Local spiller rewriting function '"
+          << MF.getFunction()->getName() << "':\n");
+    DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and"
+                    " reloads!) ****\n");
     DEBUG(MF.dump());
 
     // Spills - Keep track of which spilled values are available in physregs
@@ -924,7 +1063,7 @@ public:
       Spills.clear();
     }
 
-    DOUT << "**** Post Machine Instrs ****\n";
+    DEBUG(errs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
 
     // Mark unused spill slots.
@@ -988,6 +1127,9 @@ private:
     if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
       return false;
 
+    // Back-schedule reloads and remats.
+    ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF);
+
     // Load from SS to the spare physical register.
     TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
     // This invalidates Phys.
@@ -999,7 +1141,7 @@ private:
     // Unfold current MI.
     SmallVector<MachineInstr*, 4> NewMIs;
     if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
-      assert(0 && "Unable unfold the load / store folding instruction!");
+      llvm_unreachable("Unable unfold the load / store folding instruction!");
     assert(NewMIs.size() == 1);
     AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
     VRM.transferRestorePts(&MI, NewMIs[0]);
@@ -1015,7 +1157,7 @@ private:
       NextMII = next(NextMII);
       NewMIs.clear();
       if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
-        assert(0 && "Unable unfold the load / store folding instruction!");
+        llvm_unreachable("Unable unfold the load / store folding instruction!");
       assert(NewMIs.size() == 1);
       AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
       VRM.transferRestorePts(&NextMI, NewMIs[0]);
@@ -1157,6 +1299,32 @@ private:
     return false;
   }
 
+  /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+  /// where SrcReg is r1 and it is tied to r0. Return true if after
+  /// commuting this instruction it will be r0 = op r2, r1.
+  static bool CommuteChangesDestination(MachineInstr *DefMI,
+                                        const TargetInstrDesc &TID,
+                                        unsigned SrcReg,
+                                        const TargetInstrInfo *TII,
+                                        unsigned &DstIdx) {
+    if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+      return false;
+    if (!DefMI->getOperand(1).isReg() ||
+        DefMI->getOperand(1).getReg() != SrcReg)
+      return false;
+    unsigned DefIdx;
+    if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+      return false;
+    unsigned SrcIdx1, SrcIdx2;
+    if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+      return false;
+    if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+      DstIdx = 2;
+      return true;
+    }
+    return false;
+  }
+
   /// CommuteToFoldReload -
   /// Look for
   /// r1 = load fi#1
@@ -1185,7 +1353,7 @@ private:
     unsigned NewDstIdx;
     if (DefMII != MBB.begin() &&
         TID.isCommutable() &&
-        TII->CommuteChangesDestination(DefMI, NewDstIdx)) {
+        CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
       MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
       unsigned NewReg = NewDstMO.getReg();
       if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
@@ -1266,11 +1434,11 @@ private:
     TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
     MachineInstr *StoreMI = next(MII);
     VRM.addSpillSlotUse(StackSlot, StoreMI);
-    DOUT << "Store:\t" << *StoreMI;
+    DEBUG(errs() << "Store:\t" << *StoreMI);
 
     // If there is a dead store to this stack slot, nuke it now.
     if (LastStore) {
-      DOUT << "Removed dead store:\t" << *LastStore;
+      DEBUG(errs() << "Removed dead store:\t" << *LastStore);
       ++NumDSE;
       SmallVector<unsigned, 2> KillRegs;
       InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
@@ -1310,6 +1478,29 @@ private:
     ++NumStores;
   }
 
+  /// isSafeToDelete - Return true if this instruction doesn't produce any side
+  /// effect and all of its defs are dead.
+  static bool isSafeToDelete(MachineInstr &MI) {
+    const TargetInstrDesc &TID = MI.getDesc();
+    if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+        TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+        TID.hasUnmodeledSideEffects())
+      return false;
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (MO.isDef() && !MO.isDead())
+        return false;
+      if (MO.isUse() && MO.isKill())
+        // FIXME: We can't remove kill markers or else the scavenger will assert.
+        // An alternative is to add a ADD pseudo instruction to replace kill
+        // markers.
+        return false;
+    }
+    return true;
+  }
+
   /// TransferDeadness - A identity copy definition is dead and it's being
   /// removed. Find the last def or use and mark it as dead / kill.
   void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
@@ -1351,9 +1542,7 @@ private:
       if (LastUD->isDef()) {
         // If the instruction has no side effect, delete it and propagate
         // backward further. Otherwise, mark is dead and we are done.
-        const TargetInstrDesc &TID = LastUDMI->getDesc();
-        if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
-            TID.hasUnmodeledSideEffects()) {
+        if (!isSafeToDelete(*LastUDMI)) {
           LastUD->setIsDead();
           break;
         }
@@ -1375,8 +1564,8 @@ private:
                   AvailableSpills &Spills, BitVector &RegKills,
                   std::vector<MachineOperand*> &KillOps) {
 
-    DOUT << "\n**** Local spiller rewriting MBB '"
-         << MBB.getBasicBlock()->getName() << "':\n";
+    DEBUG(errs() << "\n**** Local spiller rewriting MBB '"
+          << MBB.getBasicBlock()->getName() << "':\n");
 
     MachineFunction &MF = *MBB.getParent();
     
@@ -1425,15 +1614,23 @@ private:
           assert(RC && "Unable to determine register class!");
           int SS = VRM.getEmergencySpillSlot(RC);
           if (UsedSS.count(SS))
-            assert(0 && "Need to spill more than one physical registers!");
+            llvm_unreachable("Need to spill more than one physical registers!");
           UsedSS.insert(SS);
           TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
           MachineInstr *StoreMI = prior(MII);
           VRM.addSpillSlotUse(SS, StoreMI);
-          TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC);
-          MachineInstr *LoadMI = next(MII);
+
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false,
+                             SS, TII, MF);
+
+          TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
+
+          MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(SS, LoadMI);
           ++NumPSpills;
+          DistanceMap.insert(std::make_pair(LoadMI, Dist++));
         }
         NextMII = next(MII);
       }
@@ -1467,28 +1664,36 @@ private:
             // If the value is already available in the expected register, save
             // a reload / remat.
             if (SSorRMId)
-              DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << SSorRMId;
-            DOUT << " from physreg "
-                 << TRI->getName(InReg) << " for vreg"
-                 << VirtReg <<" instead of reloading into physreg "
-                 << TRI->getName(Phys) << "\n";
+              DEBUG(errs() << "Reusing SS#" << SSorRMId);
+            DEBUG(errs() << " from physreg "
+                         << TRI->getName(InReg) << " for vreg"
+                         << VirtReg <<" instead of reloading into physreg "
+                         << TRI->getName(Phys) << '\n');
             ++NumOmitted;
             continue;
           } else if (InReg && InReg != Phys) {
             if (SSorRMId)
-              DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << SSorRMId;
-            DOUT << " from physreg "
-                 << TRI->getName(InReg) << " for vreg"
-                 << VirtReg <<" by copying it into physreg "
-                 << TRI->getName(Phys) << "\n";
+              DEBUG(errs() << "Reusing SS#" << SSorRMId);
+            DEBUG(errs() << " from physreg "
+                         << TRI->getName(InReg) << " for vreg"
+                         << VirtReg <<" by copying it into physreg "
+                         << TRI->getName(Phys) << '\n');
 
             // If the reloaded / remat value is available in another register,
             // copy it to the desired register.
-            TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC);
+
+            // Back-schedule reloads and remats.
+            MachineBasicBlock::iterator InsertLoc =
+              ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
+                               SSorRMId, TII, MF);
+
+            TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC);
 
             // This invalidates Phys.
             Spills.ClobberPhysReg(Phys);
@@ -1496,24 +1701,30 @@ private:
             Spills.addAvailable(SSorRMId, Phys);
 
             // Mark is killed.
-            MachineInstr *CopyMI = prior(MII);
+            MachineInstr *CopyMI = prior(InsertLoc);
             MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
             KillOpnd->setIsKill();
             UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
-            DOUT << '\t' << *CopyMI;
+            DEBUG(errs() << '\t' << *CopyMI);
             ++NumCopified;
             continue;
           }
 
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
+                             SSorRMId, TII, MF);
+
           if (VRM.isReMaterialized(VirtReg)) {
-            ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM);
+            ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM);
           } else {
             const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(MII);
+            TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC);
+            MachineInstr *LoadMI = prior(InsertLoc);
             VRM.addSpillSlotUse(SSorRMId, LoadMI);
             ++NumLoads;
+            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
           }
 
           // This invalidates Phys.
@@ -1521,8 +1732,8 @@ private:
           // Remember it's available.
           Spills.addAvailable(SSorRMId, Phys);
 
-          UpdateKills(*prior(MII), TRI, RegKills, KillOps);
-          DOUT << '\t' << *prior(MII);
+          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+          DEBUG(errs() << '\t' << *prior(MII));
         }
       }
 
@@ -1541,7 +1752,7 @@ private:
           TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
           MachineInstr *StoreMI = next(MII);
           VRM.addSpillSlotUse(StackSlot, StoreMI);
-          DOUT << "Store:\t" << *StoreMI;
+          DEBUG(errs() << "Store:\t" << *StoreMI);
           VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
         }
         NextMII = next(MII);
@@ -1660,13 +1871,14 @@ private:
           if (CanReuse) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                           << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << ReuseSlot;
-            DOUT << " from physreg "
-                 << TRI->getName(PhysReg) << " for vreg"
-                 << VirtReg <<" instead of reloading into physreg "
-                 << TRI->getName(VRM.getPhys(VirtReg)) << "\n";
+              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(errs() << " from physreg "
+                         << TRI->getName(PhysReg) << " for vreg"
+                         << VirtReg <<" instead of reloading into physreg "
+                         << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
             unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
             MI.getOperand(i).setReg(RReg);
             MI.getOperand(i).setSubReg(0);
@@ -1733,20 +1945,22 @@ private:
           // available.  If this occurs, use the register indicated by the
           // reuser.
           if (ReusedOperands.hasReuses())
-            DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, 
-                                 Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+            DesignatedReg = ReusedOperands.GetRegForReload(VirtReg,
+                                                           DesignatedReg, &MI, 
+                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
           
           // If the mapped designated register is actually the physreg we have
           // incoming, we don't need to inserted a dead copy.
           if (DesignatedReg == PhysReg) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                    << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << ReuseSlot;
-            DOUT << " from physreg " << TRI->getName(PhysReg)
-                 << " for vreg" << VirtReg
-                 << " instead of reloading into same physreg.\n";
+              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(errs() << " from physreg " << TRI->getName(PhysReg)
+                         << " for vreg" << VirtReg
+                         << " instead of reloading into same physreg.\n");
             unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
             MI.getOperand(i).setReg(RReg);
             MI.getOperand(i).setSubReg(0);
@@ -1758,9 +1972,15 @@ private:
           const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
           RegInfo->setPhysRegUsed(DesignatedReg);
           ReusedOperands.markClobbered(DesignatedReg);
-          TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC);
 
-          MachineInstr *CopyMI = prior(MII);
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat,
+                             SSorRMId, TII, MF);
+
+          TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+
+          MachineInstr *CopyMI = prior(InsertLoc);
           UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
           // This invalidates DesignatedReg.
@@ -1771,7 +1991,7 @@ private:
             SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
-          DOUT << '\t' << *prior(MII);
+          DEBUG(errs() << '\t' << *prior(MII));
           ++NumReused;
           continue;
         } // if (PhysReg)
@@ -1785,22 +2005,28 @@ private:
         // available.  If this occurs, use the register indicated by the
         // reuser.
         if (ReusedOperands.hasReuses())
-          PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, 
-                                 Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
+                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
         
         RegInfo->setPhysRegUsed(PhysReg);
         ReusedOperands.markClobbered(PhysReg);
         if (AvoidReload)
           ++NumAvoided;
         else {
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat,
+                             SSorRMId, TII, MF);
+
           if (DoReMat) {
-            ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM);
+            ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM);
           } else {
             const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(MII);
+            TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC);
+            MachineInstr *LoadMI = prior(InsertLoc);
             VRM.addSpillSlotUse(SSorRMId, LoadMI);
             ++NumLoads;
+            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
           }
           // This invalidates PhysReg.
           Spills.ClobberPhysReg(PhysReg);
@@ -1817,8 +2043,8 @@ private:
             KilledMIRegs.insert(VirtReg);
           }
 
-          UpdateKills(*prior(MII), TRI, RegKills, KillOps);
-          DOUT << '\t' << *prior(MII);
+          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+          DEBUG(errs() << '\t' << *prior(InsertLoc));
         }
         unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
         MI.getOperand(i).setReg(RReg);
@@ -1832,7 +2058,7 @@ private:
         int PDSSlot = PotentialDeadStoreSlots[j];
         MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
         if (DeadStore) {
-          DOUT << "Removed dead store:\t" << *DeadStore;
+          DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
           InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
           VRM.RemoveMachineInstrFromMaps(DeadStore);
           MBB.erase(DeadStore);
@@ -1842,7 +2068,7 @@ private:
       }
 
 
-      DOUT << '\t' << MI;
+      DEBUG(errs() << '\t' << MI);
 
 
       // If we have folded references to memory operands, make sure we clear all
@@ -1852,7 +2078,7 @@ private:
       for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
         unsigned VirtReg = I->second.first;
         VirtRegMap::ModRef MR = I->second.second;
-        DOUT << "Folded vreg: " << VirtReg << "  MR: " << MR;
+        DEBUG(errs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
 
         // MI2VirtMap be can updated which invalidate the iterator.
         // Increment the iterator first.
@@ -1861,7 +2087,7 @@ private:
         if (SS == VirtRegMap::NO_STACK_SLOT)
           continue;
         FoldedSS.insert(SS);
-        DOUT << " - StackSlot: " << SS << "\n";
+        DEBUG(errs() << " - StackSlot: " << SS << "\n");
         
         // If this folded instruction is just a use, check to see if it's a
         // straight load from the virt reg slot.
@@ -1872,7 +2098,7 @@ private:
             // If this spill slot is available, turn it into a copy (or nothing)
             // instead of leaving it as a load!
             if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DOUT << "Promoted Load To Copy: " << MI;
+              DEBUG(errs() << "Promoted Load To Copy: " << MI);
               if (DestReg != InReg) {
                 const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
                 TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
@@ -1895,7 +2121,7 @@ private:
 
                 BackTracked = true;
               } else {
-                DOUT << "Removing now-noop copy: " << MI;
+                DEBUG(errs() << "Removing now-noop copy: " << MI);
                 // Unset last kill since it's being reused.
                 InvalidateKill(InReg, TRI, RegKills, KillOps);
                 Spills.disallowClobberPhysReg(InReg);
@@ -1965,7 +2191,7 @@ private:
 
           if (isDead) {  // Previous store is dead.
             // If we get here, the store is dead, nuke it now.
-            DOUT << "Removed dead store:\t" << *DeadStore;
+            DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
             InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
             VRM.RemoveMachineInstrFromMaps(DeadStore);
             MBB.erase(DeadStore);
@@ -2036,7 +2262,7 @@ private:
           if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
               !MI.findRegisterUseOperand(Src)->isUndef()) {
             ++NumDCE;
-            DOUT << "Removing now-noop copy: " << MI;
+            DEBUG(errs() << "Removing now-noop copy: " << MI);
             SmallVector<unsigned, 2> KillRegs;
             InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
             if (MO.isDead() && !KillRegs.empty()) {
@@ -2100,8 +2326,8 @@ private:
           if (ReusedOperands.isClobbered(PhysReg)) {
             // Another def has taken the assigned physreg. It must have been a
             // use&def which got it due to reuse. Undo the reuse!
-            PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, 
-                                 Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+            PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
+                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
           }
         }
 
@@ -2124,7 +2350,7 @@ private:
             unsigned Src, Dst, SrcSR, DstSR;
             if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
               ++NumDCE;
-              DOUT << "Removing now-noop copy: " << MI;
+              DEBUG(errs() << "Removing now-noop copy: " << MI);
               InvalidateKills(MI, TRI, RegKills, KillOps);
               VRM.RemoveMachineInstrFromMaps(&MI);
               MBB.erase(&MI);
@@ -2136,7 +2362,15 @@ private:
         }    
       }
     ProcessNextInst:
-      DistanceMap.insert(std::make_pair(&MI, Dist++));
+      // Delete dead instructions without side effects.
+      if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+        InvalidateKills(MI, TRI, RegKills, KillOps);
+        VRM.RemoveMachineInstrFromMaps(&MI);
+        MBB.erase(&MI);
+        Erased = true;
+      }
+      if (!Erased)
+        DistanceMap.insert(std::make_pair(&MI, Dist++));
       if (!Erased && !BackTracked) {
         for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
           UpdateKills(*II, TRI, RegKills, KillOps);
@@ -2148,9 +2382,11 @@ private:
 
 };
 
+}
+
 llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
   switch (RewriterOpt) {
-  default: assert(0 && "Unreachable!");
+  default: llvm_unreachable("Unreachable!");
   case local:
     return new LocalRewriter();
   case trivial:
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index f9d7fbb..44f9df6 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -10,27 +10,9 @@
 #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
 #define LLVM_CODEGEN_VIRTREGREWRITER_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Streams.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
 #include "VirtRegMap.h"
-#include <map>
 
 namespace llvm {