From 02fb33730c346d6a785d935d7aba9af93ba51da6 Mon Sep 17 00:00:00 2001
From: rdivacky <rdivacky@FreeBSD.org>
Date: Thu, 19 Nov 2009 08:59:28 +0000
Subject: Update LLVM to r89337.

---
 CMakeLists.txt                                  |   2 +-
 Makefile.rules                                  |  49 +++-
 autoconf/config.guess                           |   4 +
 autoconf/config.sub                             |   5 +-
 include/llvm/ADT/Twine.h                        |   8 +-
 include/llvm/CodeGen/LiveIntervalAnalysis.h     |  10 -
 include/llvm/CodeGen/MachineFunction.h          |   2 +-
 include/llvm/Support/MemoryBuffer.h             |   2 +-
 include/llvm/Target/SubtargetFeature.h          |   5 +
 include/llvm/Transforms/Utils/Local.h           |   9 +-
 lib/Analysis/IVUsers.cpp                        |  10 +
 lib/CodeGen/AsmPrinter/DwarfException.cpp       |  35 ++-
 lib/CodeGen/BranchFolding.cpp                   |  20 +-
 lib/CodeGen/LiveIntervalAnalysis.cpp            | 137 -----------
 lib/CodeGen/MachineVerifier.cpp                 | 177 +++++++++++++--
 lib/CodeGen/PHIElimination.cpp                  |  34 +--
 lib/CodeGen/PHIElimination.h                    |   3 +-
 lib/CodeGen/RegAllocLinearScan.cpp              |  21 +-
 lib/CodeGen/Spiller.cpp                         | 287 +++++++-----------------
 lib/CodeGen/Spiller.h                           |  12 +-
 lib/CodeGen/TwoAddressInstructionPass.cpp       |  17 +-
 lib/CodeGen/VirtRegRewriter.cpp                 |  31 ++-
 lib/Target/ARM/ARM.td                           |  18 +-
 lib/Target/ARM/ARMISelDAGToDAG.cpp              |   3 +-
 lib/Target/ARM/ARMInstrInfo.td                  |   4 +-
 lib/Target/ARM/ARMInstrThumb.td                 |  25 ++-
 lib/Target/ARM/ARMScheduleV6.td                 | 188 +++++++++++++++-
 lib/Target/ARM/ARMScheduleV7.td                 |   8 +-
 lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp     |  13 +-
 lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp    |   5 +
 lib/Target/ARM/AsmPrinter/ARMInstPrinter.h      |   3 +-
 lib/Target/ARM/Thumb2SizeReduction.cpp          |  34 ++-
 lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp   |   2 +
 lib/Target/Mips/MipsISelDAGToDAG.cpp            | 129 +++++++++++
 lib/Target/Mips/MipsRegisterInfo.h              |   9 +
 lib/Target/Mips/MipsRegisterInfo.td             |  28 ++-
 lib/Target/Mips/MipsTargetObjectFile.cpp        |   7 +
 lib/Target/SubtargetFeature.cpp                 |  27 +++
 lib/Target/TargetLoweringObjectFile.cpp         |   4 +-
 lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp |   2 +
 lib/Target/XCore/XCoreISelLowering.cpp          |  15 +-
 lib/Target/XCore/XCoreISelLowering.h            |   1 +
 lib/Target/XCore/XCoreInstrInfo.td              |   6 +
 lib/Transforms/Utils/LoopSimplify.cpp           |   6 +
 lib/Transforms/Utils/SimplifyCFG.cpp            |   2 +-
 test/CodeGen/ARM/tail-opts.ll                   |  64 ++++++
 test/CodeGen/Mips/2008-08-03-ReturnDouble.ll    |   1 -
 test/CodeGen/Thumb/pop.ll                       |   2 +-
 test/CodeGen/Thumb2/2009-07-21-ISelBug.ll       |   2 +-
 test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll      |   2 +-
 test/CodeGen/Thumb2/large-stack.ll              |   6 +-
 test/CodeGen/Thumb2/ldr-str-imm12.ll            |   6 +
 test/CodeGen/X86/2009-11-18-TwoAddrKill.ll      |  29 +++
 test/CodeGen/X86/unaligned-load.ll              |  28 +++
 test/CodeGen/XCore/indirectbr.ll                |  45 ++++
 test/FrontendC++/2009-07-15-LineNumbers.cpp     |   2 +-
 tools/lto/LTOCodeGenerator.cpp                  |   3 +-
 tools/lto/LTOModule.cpp                         |  25 +--
 tools/lto/LTOModule.h                           |   2 -
 utils/TableGen/CMakeLists.txt                   |   1 +
 utils/TableGen/OptParserEmitter.cpp             | 192 ++++++++++++++++
 utils/TableGen/OptParserEmitter.h               |  34 +++
 utils/TableGen/TableGen.cpp                     |  32 ++-
 utils/lit/TestFormats.py                        |   5 +-
 64 files changed, 1338 insertions(+), 562 deletions(-)
 create mode 100644 test/CodeGen/ARM/tail-opts.ll
 create mode 100644 test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
 create mode 100644 test/CodeGen/X86/unaligned-load.ll
 create mode 100644 test/CodeGen/XCore/indirectbr.ll
 create mode 100644 utils/TableGen/OptParserEmitter.cpp
 create mode 100644 utils/TableGen/OptParserEmitter.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 839def7..0021478 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -323,7 +323,7 @@ if(LLVM_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
 
-option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." ON)
+option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." OFF)
 if(LLVM_BUILD_EXAMPLES)
   add_subdirectory(examples)
 endif ()
diff --git a/Makefile.rules b/Makefile.rules
index 6bda564..d9b2108 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -736,6 +736,8 @@ else
 Ranlib        = ranlib
 endif
 
+AliasTool     = ln -s
+
 #----------------------------------------------------------
 # Get the list of source files and compute object file
 # names from them.
@@ -1215,10 +1217,20 @@ ifdef TOOLNAME
 #---------------------------------------------------------
 # Set up variables for building a tool.
 #---------------------------------------------------------
+TOOLEXENAME := $(strip $(TOOLNAME))$(EXEEXT)
 ifdef EXAMPLE_TOOL
-ToolBuildPath   := $(ExmplDir)/$(strip $(TOOLNAME))$(EXEEXT)
+ToolBuildPath   := $(ExmplDir)/$(TOOLEXENAME)
 else
-ToolBuildPath   := $(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT)
+ToolBuildPath   := $(ToolDir)/$(TOOLEXENAME)
+endif
+
+# TOOLALIAS is a name to symlink (or copy) the tool to.
+ifdef TOOLALIAS
+ifdef EXAMPLE_TOOL
+ToolAliasBuildPath   := $(ExmplDir)/$(strip $(TOOLALIAS))$(EXEEXT)
+else
+ToolAliasBuildPath   := $(ToolDir)/$(strip $(TOOLALIAS))$(EXEEXT)
+endif
 endif
 
 #---------------------------------------------------------
@@ -1246,12 +1258,15 @@ endif
 #---------------------------------------------------------
 # Provide targets for building the tools
 #---------------------------------------------------------
-all-local:: $(ToolBuildPath)
+all-local:: $(ToolBuildPath) $(ToolAliasBuildPath)
 
 clean-local::
 ifneq ($(strip $(ToolBuildPath)),)
 	-$(Verb) $(RM) -f $(ToolBuildPath)
 endif
+ifneq ($(strip $(ToolAliasBuildPath)),)
+	-$(Verb) $(RM) -f $(ToolAliasBuildPath)
+endif
 
 ifdef EXAMPLE_TOOL
 $(ToolBuildPath): $(ExmplDir)/.dir
@@ -1266,13 +1281,22 @@ $(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
           $(StripWarnMsg)
 
+ifneq ($(strip $(ToolAliasBuildPath)),)
+$(ToolAliasBuildPath): $(ToolBuildPath)
+	$(Echo) Creating $(BuildMode) Alias $(TOOLALIAS) $(StripWarnMsg)
+	$(Verb) $(RM) -f $(ToolAliasBuildPath)
+	$(Verb) $(AliasTool) $(TOOLEXENAME) $(ToolAliasBuildPath)
+	$(Echo) ======= Finished Creating $(BuildMode) Alias $(TOOLNAME) \
+          $(StripWarnMsg)
+endif
+
 ifdef NO_INSTALL
 install-local::
 	$(Echo) Install circumvented with NO_INSTALL
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestTool = $(PROJ_bindir)/$(TOOLNAME)$(EXEEXT)
+DestTool = $(PROJ_bindir)/$(TOOLEXENAME)
 
 install-local:: $(DestTool)
 
@@ -1283,6 +1307,23 @@ $(DestTool): $(ToolBuildPath) $(PROJ_bindir)
 uninstall-local::
 	$(Echo) Uninstalling $(BuildMode) $(DestTool)
 	-$(Verb) $(RM) -f $(DestTool)
+
+# TOOLALIAS install.
+ifdef TOOLALIAS
+DestToolAlias = $(PROJ_bindir)/$(TOOLALIAS)$(EXEEXT)
+
+install-local:: $(DestToolAlias)
+
+$(DestToolAlias): $(DestTool) $(PROJ_bindir)
+	$(Echo) Installing $(BuildMode) $(DestToolAlias)
+	$(Verb) $(RM) -f $(DestToolAlias)
+	$(Verb) $(AliasTool) $(TOOLEXENAME) $(DestToolAlias)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) $(DestToolAlias)
+	-$(Verb) $(RM) -f $(DestToolAlias)
+endif
+
 endif
 endif
 
diff --git a/autoconf/config.guess b/autoconf/config.guess
index e792aac..865fe53 100755
--- a/autoconf/config.guess
+++ b/autoconf/config.guess
@@ -333,6 +333,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
 	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
 	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	AUX_ARCH="i386"
+	echo ${AUX_ARCH}-pc-auroraux`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	eval $set_cc_for_build
 	SUN_ARCH="i386"
diff --git a/autoconf/config.sub b/autoconf/config.sub
index 8ca084b..183976a 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -1256,6 +1256,9 @@ case $os in
 	-solaris1 | -solaris1.*)
 		os=`echo $os | sed -e 's|solaris1|sunos4|'`
 		;;
+	-auroraux)
+		os=-auroraux
+		;;
 	-solaris)
 		os=-solaris2
 		;;
@@ -1274,7 +1277,7 @@ case $os in
 	# -sysv* is not here because it comes later, after sysvr4.
 	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
 	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* | -sym* \
 	      | -kopensolaris* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
 	      | -aos* | -aros* \
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index f4722db..ca0be53 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -133,9 +133,9 @@ namespace llvm {
     /// Null or Empty kinds.
     const void *RHS;
     /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
-    NodeKind LHSKind : 8;
+    unsigned char LHSKind;
     /// RHSKind - The NodeKind of the left hand side, \see getLHSKind().
-    NodeKind RHSKind : 8;
+    unsigned char RHSKind;
 
   private:
     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
@@ -209,10 +209,10 @@ namespace llvm {
     }
 
     /// getLHSKind - Get the NodeKind of the left-hand side.
-    NodeKind getLHSKind() const { return LHSKind; }
+    NodeKind getLHSKind() const { return (NodeKind) LHSKind; }
 
     /// getRHSKind - Get the NodeKind of the left-hand side.
-    NodeKind getRHSKind() const { return RHSKind; }
+    NodeKind getRHSKind() const { return (NodeKind) RHSKind; }
 
     /// printOneChild - Print one child from a twine.
     void printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const;
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index cf768c3..7a02d0f 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -60,9 +60,6 @@ namespace llvm {
     typedef DenseMap<unsigned, LiveInterval*> Reg2IntervalMap;
     Reg2IntervalMap r2iMap_;
 
-    /// phiJoinCopies - Copy instructions which are PHI joins.
-    SmallVector<MachineInstr*, 16> phiJoinCopies;
-
     /// allocatableRegs_ - A bit vector of allocatable registers.
     BitVector allocatableRegs_;
 
@@ -278,13 +275,6 @@ namespace llvm {
     /// computeIntervals - Compute live intervals.
     void computeIntervals();
 
-    bool isSafeAndProfitableToCoalesce(LiveInterval &DstInt,
-                                       LiveInterval &SrcInt,
-                 SmallVector<MachineInstr*,16> &IdentCopies,
-                 SmallVector<MachineInstr*,16> &OtherCopies);
-
-    void performEarlyCoalescing();
-
     /// handleRegisterDef - update intervals for a register def
     /// (calls handlePhysicalRegisterDef and
     /// handleVirtualRegisterDef)
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index d2f5224..f1bfa01 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -233,7 +233,7 @@ public:
 
   /// verify - Run the current MachineFunction through the machine code
   /// verifier, useful for debugger use.
-  void verify() const;
+  void verify(Pass *p=NULL, bool allowDoubleDefs=false) const;
 
   // Provide accessors for the MachineBasicBlock list...
   typedef BasicBlockListType::iterator iterator;
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 95d0d3d..65c7167 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -24,7 +24,7 @@ namespace llvm {
 /// of memory, and provides simple methods for reading files and standard input
 /// into a memory buffer.  In addition to basic access to the characters in the
 /// file, this interface guarantees you can read one character past the end of
-/// @verbatim the file, and that this character will read as '\0'. @endverbatim
+/// the file, and that this character will read as '\0'.
 class MemoryBuffer {
   const char *BufferStart; // Start of the buffer.
   const char *BufferEnd;   // End of the buffer.
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h
index a709f52..38a3cc2 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/Target/SubtargetFeature.h
@@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 #include <cstring>
+#include "llvm/ADT/Triple.h"
 #include "llvm/System/DataTypes.h"
 
 namespace llvm {
@@ -106,6 +107,10 @@ public:
   
   // Dump feature info.
   void dump() const;
+
+  /// Retrieve a formatted string of the default features for
+  /// the specified target triple.
+  static std::string getDefaultSubtargetFeatures(const Triple &Triple);
 };
 
 } // End namespace llvm
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 292af1d..e6687bb 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -107,7 +107,14 @@ void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P = 0);
 /// rewriting all the predecessors to branch to the successor block and return
 /// true.  If we can't transform, return false.
 bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB);
-    
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool EliminateDuplicatePHINodes(BasicBlock *BB);
+
 /// SimplifyCFG - This function is used to do simplification of a CFG.  For
 /// example, it adjusts branches to branches to eliminate the extra hop, it
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index cf52320..efe40e4 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -31,6 +32,10 @@ char IVUsers::ID = 0;
 static RegisterPass<IVUsers>
 X("iv-users", "Induction Variable Users", false, true);
 
+static cl::opt<bool>
+SimplifyIVUsers("simplify-iv-users", cl::Hidden, cl::init(false),
+          cl::desc("Restrict IV Users to loop-invariant strides"));
+
 Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
@@ -208,6 +213,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
     return false;  // Non-reducible symbolic expression, bail out.
 
+  // Keep things simple. Don't touch loop-variant strides.
+  if (SimplifyIVUsers && !Stride->isLoopInvariant(L)
+      && L->contains(I->getParent()))
+    return false;
+
   SmallPtrSet<Instruction *, 4> UniqueUsers;
   for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
        UI != E; ++UI) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 1c8b8f4..fcdcfd3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -727,7 +727,8 @@ void DwarfException::EmitExceptionTable() {
     // somewhere.  This predicate should be moved to a shared location that is
     // in target-independent code.
     //
-    if (LSDASection->getKind().isWriteable() ||
+    if ((LSDASection->getKind().isWriteable() &&
+         !LSDASection->getKind().isReadOnlyWithRel()) ||
         Asm->TM.getRelocationModel() == Reloc::Static)
       TTypeFormat = dwarf::DW_EH_PE_absptr;
     else
@@ -917,14 +918,36 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Catch TypeInfos.
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned Index = 1;
+
   for (std::vector<GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
-    const GlobalVariable *GV = *I;
-    PrintRelDirective();
-
-    if (GV) {
-      O << Asm->Mang->getMangledName(GV);
+    const GlobalVariable *TI = *I;
+
+    if (TI) {
+      if (!LSDASection->getKind().isReadOnlyWithRel() &&
+          (TTypeFormat == dwarf::DW_EH_PE_absptr ||
+           TI->getLinkage() == GlobalValue::InternalLinkage)) {
+        // Print out the unadorned name of the type info.
+        PrintRelDirective();
+        O << Asm->Mang->getMangledName(TI);
+      } else {
+        bool IsTypeInfoIndirect = false, IsTypeInfoPCRel = false;
+        const MCExpr *TypeInfoRef =
+          TLOF.getSymbolForDwarfGlobalReference(TI, Asm->Mang, Asm->MMI,
+                                                IsTypeInfoIndirect,
+                                                IsTypeInfoPCRel);
+
+        if (!IsTypeInfoPCRel)
+          TypeInfoRef = CreateLabelDiff(TypeInfoRef, "typeinforef_addr",
+                                        Index++);
+
+        O << MAI->getData32bitsDirective();
+        TypeInfoRef->print(O, MAI);
+      }
     } else {
+      PrintRelDirective();
       O << "0x0";
     }
 
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 94bfb72..f807e8f 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -41,8 +41,12 @@ using namespace llvm;
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
+
 // Throttle for huge numbers of predecessors (compile speed problems)
 static cl::opt<unsigned>
 TailMergeThreshold("tail-merge-threshold",
@@ -193,7 +197,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= OptimizeImpDefsBlock(MBB);
   }
 
-
   bool MadeChangeThisIteration = true;
   while (MadeChangeThisIteration) {
     MadeChangeThisIteration = false;
@@ -202,10 +205,15 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= MadeChangeThisIteration;
   }
 
-  // Do tail duplication once after tail merging is done.  Otherwise it is
+  // Do tail duplication after tail merging is done.  Otherwise it is
   // tough to avoid situations where tail duplication and tail merging undo
   // each other's transformations ad infinitum.
-  MadeChange |= TailDuplicateBlocks(MF);
+  MadeChangeThisIteration = true;
+  while (MadeChangeThisIteration) {
+    MadeChangeThisIteration = false;
+    MadeChangeThisIteration |= TailDuplicateBlocks(MF);
+    MadeChange |= MadeChangeThisIteration;
+  }
 
   // See if any jump tables have become mergable or dead as the code generator
   // did its thing.
@@ -1003,9 +1011,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
 bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
   bool MadeChange = false;
 
-  // Make sure blocks are numbered in order
-  MF.RenumberBlocks();
-
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
 
@@ -1017,6 +1022,7 @@ bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
 
     // If it is dead, remove it.
     if (MBB->pred_empty()) {
+      NumInstrDups -= MBB->size();
       RemoveDeadBlock(MBB);
       MadeChange = true;
       ++NumDeadBlocks;
@@ -1097,6 +1103,7 @@ bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
       MachineInstr *NewMI = MF.CloneMachineInstr(I);
       PredBB->insert(PredBB->end(), NewMI);
     }
+    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
 
     // Update the CFG.
     PredBB->removeSuccessor(PredBB->succ_begin());
@@ -1107,6 +1114,7 @@ bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
        PredBB->addSuccessor(*I);
 
     Changed = true;
+    ++NumTailDups;
   }
 
   // If TailBB was duplicated into all its predecessors except for the prior
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index a60d34f..bbfc82b 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -53,16 +53,9 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
 static cl::opt<bool> EnableFastSpilling("fast-spill",
                                         cl::init(false), cl::Hidden);
 
-static cl::opt<bool> EarlyCoalescing("early-coalescing",
-                                     cl::init(false), cl::Hidden);
-
-static cl::opt<int> CoalescingLimit("early-coalescing-limit",
-                                    cl::init(-1), cl::Hidden);
-
 STATISTIC(numIntervals , "Number of original intervals");
 STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
 STATISTIC(numSplits    , "Number of intervals split");
-STATISTIC(numCoalescing, "Number of early coalescing performed");
 
 char LiveIntervals::ID = 0;
 static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
@@ -96,7 +89,6 @@ void LiveIntervals::releaseMemory() {
     delete I->second;
   
   r2iMap_.clear();
-  phiJoinCopies.clear();
 
   // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
   VNInfoAllocator.Reset();
@@ -121,7 +113,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   allocatableRegs_ = tri_->getAllocatableSet(fn);
 
   computeIntervals();
-  performEarlyCoalescing();
 
   numIntervals += getNumIntervals();
 
@@ -409,7 +400,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         // Remove the old range that we now know has an incorrect number.
         VNInfo *VNI = interval.getValNumInfo(0);
         MachineInstr *Killer = vi.Kills[0];
-        phiJoinCopies.push_back(Killer);
         SlotIndex Start = getMBBStartIdx(Killer->getParent());
         SlotIndex End = getInstructionIndex(Killer).getDefIndex();
         DEBUG({
@@ -653,133 +643,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   DEBUG(errs() << " +" << LR << '\n');
 }
 
-bool LiveIntervals::
-isSafeAndProfitableToCoalesce(LiveInterval &DstInt,
-                              LiveInterval &SrcInt,
-                              SmallVector<MachineInstr*,16> &IdentCopies,
-                              SmallVector<MachineInstr*,16> &OtherCopies) {
-  unsigned NumIdent = 0;
-  for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg),
-         re = mri_->def_end(); ri != re; ++ri) {
-    MachineInstr *MI = &*ri;
-    unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-    if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
-      return false;
-    if (SrcReg != DstInt.reg) {
-      // Non-identity copy - we cannot handle overlapping intervals
-      if (DstInt.liveAt(getInstructionIndex(MI)))
-        return false;
-      OtherCopies.push_back(MI);
-    } else {
-      IdentCopies.push_back(MI);
-      ++NumIdent;
-    }
-  }
-
-  return IdentCopies.size() > OtherCopies.size();
-}
-
-void LiveIntervals::performEarlyCoalescing() {
-  if (!EarlyCoalescing)
-    return;
-
-  /// Perform early coalescing: eliminate copies which feed into phi joins
-  /// and whose sources are defined by the phi joins.
-  for (unsigned i = 0, e = phiJoinCopies.size(); i != e; ++i) {
-    MachineInstr *Join = phiJoinCopies[i];
-    if (CoalescingLimit != -1 && (int)numCoalescing == CoalescingLimit)
-      break;
-
-    unsigned PHISrc, PHIDst, SrcSubReg, DstSubReg;
-    bool isMove= tii_->isMoveInstr(*Join, PHISrc, PHIDst, SrcSubReg, DstSubReg);
-#ifndef NDEBUG
-    assert(isMove && "PHI join instruction must be a move!");
-#else
-    isMove = isMove;
-#endif
-
-    LiveInterval &DstInt = getInterval(PHIDst);
-    LiveInterval &SrcInt = getInterval(PHISrc);
-    SmallVector<MachineInstr*, 16> IdentCopies;
-    SmallVector<MachineInstr*, 16> OtherCopies;
-    if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt,
-                                       IdentCopies, OtherCopies))
-      continue;
-
-    DEBUG(errs() << "PHI Join: " << *Join);
-    assert(DstInt.containsOneValue() && "PHI join should have just one val#!");
-    assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 &&
-           "PHI join src should not be used elsewhere");
-    VNInfo *VNI = DstInt.getValNumInfo(0);
-
-    // Change the non-identity copies to directly target the phi destination.
-    for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) {
-      MachineInstr *PHICopy = OtherCopies[i];
-      SlotIndex MIIndex = getInstructionIndex(PHICopy);
-      DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy);
-      SlotIndex DefIndex = MIIndex.getDefIndex();
-      LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
-      SlotIndex StartIndex = SLR->start;
-      SlotIndex EndIndex = SLR->end;
-
-      // Delete val# defined by the now identity copy and add the range from
-      // beginning of the mbb to the end of the range.
-      SrcInt.removeValNo(SLR->valno);
-      DEBUG(errs() << "  added range [" << StartIndex << ','
-            << EndIndex << "] to reg" << DstInt.reg << '\n');
-      assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!");
-      VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true,
-                                           VNInfoAllocator);
-      NewVNI->setHasPHIKill(true);
-      DstInt.addRange(LiveRange(StartIndex, EndIndex, NewVNI));
-      for (unsigned j = 0, ee = PHICopy->getNumOperands(); j != ee; ++j) {
-        MachineOperand &MO = PHICopy->getOperand(j);
-        if (!MO.isReg() || MO.getReg() != PHISrc)
-          continue;
-        MO.setReg(PHIDst);
-      }
-    }
-
-    // Now let's eliminate all the would-be identity copies.
-    for (unsigned i = 0, e = IdentCopies.size(); i != e; ++i) {
-      MachineInstr *PHICopy = IdentCopies[i];
-      DEBUG(errs() << "Coalescing: " << *PHICopy);
-
-      SlotIndex MIIndex = getInstructionIndex(PHICopy);
-      SlotIndex DefIndex = MIIndex.getDefIndex();
-      LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
-      SlotIndex StartIndex = SLR->start;
-      SlotIndex EndIndex = SLR->end;
-
-      // Delete val# defined by the now identity copy and add the range from
-      // beginning of the mbb to the end of the range.
-      SrcInt.removeValNo(SLR->valno);
-      RemoveMachineInstrFromMaps(PHICopy);
-      PHICopy->eraseFromParent();
-      DEBUG(errs() << "  added range [" << StartIndex << ','
-            << EndIndex << "] to reg" << DstInt.reg << '\n');
-      DstInt.addRange(LiveRange(StartIndex, EndIndex, VNI));
-    }
-
-    // Remove the phi join and update the phi block liveness.
-    SlotIndex MIIndex = getInstructionIndex(Join);
-    SlotIndex UseIndex = MIIndex.getUseIndex();
-    SlotIndex DefIndex = MIIndex.getDefIndex();
-    LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex);
-    LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex);
-    DLR->valno->setCopy(0);
-    DLR->valno->setIsDefAccurate(false);
-    DstInt.addRange(LiveRange(SLR->start, SLR->end, DLR->valno));
-    SrcInt.removeRange(SLR->start, SLR->end);
-    assert(SrcInt.empty());
-    removeInterval(PHISrc);
-    RemoveMachineInstrFromMaps(Join);
-    Join->eraseFromParent();
-
-    ++numCoalescing;
-  }
-}
-
 /// computeIntervals - computes the live intervals for virtual
 /// registers. for some ordering of the machine instructions [1,N] a
 /// live interval is an interval [i, j) where 1 <= i <= j < N for
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index be9f68f..a1c74c0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -42,23 +42,18 @@
 using namespace llvm;
 
 namespace {
-  struct MachineVerifier : public MachineFunctionPass {
-    static char ID; // Pass ID, replacement for typeid
+  struct MachineVerifier {
 
-    MachineVerifier(bool allowDoubleDefs = false) :
-      MachineFunctionPass(&ID),
+    MachineVerifier(Pass *pass, bool allowDoubleDefs) :
+      PASS(pass),
       allowVirtDoubleDefs(allowDoubleDefs),
       allowPhysDoubleDefs(allowDoubleDefs),
       OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
-        {}
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
+      {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
+    Pass *const PASS;
     const bool allowVirtDoubleDefs;
     const bool allowPhysDoubleDefs;
 
@@ -112,6 +107,10 @@ namespace {
       // regsKilled and regsLiveOut.
       RegSet vregsPassed;
 
+      // Vregs that must pass through MBB because they are needed by a successor
+      // block. This set is disjoint from regsLiveOut.
+      RegSet vregsRequired;
+
       BBInfo() : reachable(false) {}
 
       // Add register to vregsPassed if it belongs there. Return true if
@@ -133,6 +132,34 @@ namespace {
         return changed;
       }
 
+      // Add register to vregsRequired if it belongs there. Return true if
+      // anything changed.
+      bool addRequired(unsigned Reg) {
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          return false;
+        if (regsLiveOut.count(Reg))
+          return false;
+        return vregsRequired.insert(Reg).second;
+      }
+
+      // Same for a full set.
+      bool addRequired(const RegSet &RS) {
+        bool changed = false;
+        for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+          if (addRequired(*I))
+            changed = true;
+        return changed;
+      }
+
+      // Same for a full map.
+      bool addRequired(const RegMap &RM) {
+        bool changed = false;
+        for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+          if (addRequired(I->first))
+            changed = true;
+        return changed;
+      }
+
       // Live-out registers are either in regsLiveOut or vregsPassed.
       bool isLiveOut(unsigned Reg) const {
         return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
@@ -146,6 +173,9 @@ namespace {
       return Reg < regsReserved.size() && regsReserved.test(Reg);
     }
 
+    // Analysis information if available
+    LiveVariables *LiveVars;
+
     void visitMachineFunctionBefore();
     void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
     void visitMachineInstrBefore(const MachineInstr *MI);
@@ -163,20 +193,44 @@ namespace {
     void calcMaxRegsPassed();
     void calcMinRegsPassed();
     void checkPHIOps(const MachineBasicBlock *MBB);
+
+    void calcRegsRequired();
+    void verifyLiveVariables();
+  };
+
+  struct MachineVerifierPass : public MachineFunctionPass {
+    static char ID; // Pass ID, replacement for typeid
+    bool AllowDoubleDefs;
+
+    explicit MachineVerifierPass(bool allowDoubleDefs = false)
+      : MachineFunctionPass(&ID),
+        AllowDoubleDefs(allowDoubleDefs) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) {
+      MF.verify(this, AllowDoubleDefs);
+      return false;
+    }
   };
+
 }
 
-char MachineVerifier::ID = 0;
-static RegisterPass<MachineVerifier>
+char MachineVerifierPass::ID = 0;
+static RegisterPass<MachineVerifierPass>
 MachineVer("machineverifier", "Verify generated machine code");
 static const PassInfo *const MachineVerifyID = &MachineVer;
 
 FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
-  return new MachineVerifier(allowPhysDoubleDefs);
+  return new MachineVerifierPass(allowPhysDoubleDefs);
 }
 
-void MachineFunction::verify() const {
-  MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const {
+  MachineVerifier(p, allowDoubleDefs)
+    .runOnMachineFunction(const_cast<MachineFunction&>(*this));
 }
 
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -202,6 +256,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   TRI = TM->getRegisterInfo();
   MRI = &MF.getRegInfo();
 
+  if (PASS) {
+    LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+  } else {
+    LiveVars = NULL;
+  }
+
   visitMachineFunctionBefore();
   for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
        MFI!=MFE; ++MFI) {
@@ -518,8 +578,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     } else if (MO->isUse()) {
       regsLiveInButUnused.erase(Reg);
 
+      bool isKill = false;
       if (MO->isKill()) {
-        addRegWithSubRegs(regsKilled, Reg);
+        isKill = true;
         // Tied operands on two-address instuctions MUST NOT have a <kill> flag.
         if (MI->isRegTiedToDefOperand(MONum))
             report("Illegal kill flag on two-address instruction operand",
@@ -529,8 +590,20 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         unsigned defIdx;
         if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
             MI->getOperand(defIdx).getReg() == Reg)
-          addRegWithSubRegs(regsKilled, Reg);
+          isKill = true;
+      }
+      if (isKill) {
+        addRegWithSubRegs(regsKilled, Reg);
+
+        // Check that LiveVars knows this kill
+        if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) {
+          LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+          if (std::find(VI.Kills.begin(),
+                        VI.Kills.end(), MI) == VI.Kills.end())
+            report("Kill missing from LiveVariables", MO, MONum);
+        }
       }
+
       // Use of a dead register.
       if (!regsLive.count(Reg)) {
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -734,6 +807,41 @@ void MachineVerifier::calcMinRegsPassed() {
   }
 }
 
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcMaxRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+  // First push live-in regs to predecessors' vregsRequired.
+  DenseSet<const MachineBasicBlock*> todo;
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    const MachineBasicBlock &MBB(*MFI);
+    BBInfo &MInfo = MBBInfoMap[&MBB];
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+           PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+      BBInfo &PInfo = MBBInfoMap[*PrI];
+      if (PInfo.addRequired(MInfo.vregsLiveIn))
+        todo.insert(*PrI);
+    }
+  }
+
+  // Iteratively push vregsRequired to predecessors. This will converge to the
+  // same final state regardless of DenseSet iteration order.
+  while (!todo.empty()) {
+    const MachineBasicBlock *MBB = *todo.begin();
+    todo.erase(MBB);
+    BBInfo &MInfo = MBBInfoMap[MBB];
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+      if (*PrI == MBB)
+        continue;
+      BBInfo &SInfo = MBBInfoMap[*PrI];
+      if (SInfo.addRequired(MInfo.vregsRequired))
+        todo.insert(*PrI);
+    }
+  }
+}
+
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
@@ -849,4 +957,39 @@ void MachineVerifier::visitMachineFunctionAfter() {
       }
     }
   }
+
+  // Now check LiveVariables info if available
+  if (LiveVars) {
+    calcRegsRequired();
+    verifyLiveVariables();
+  }
 }
+
+void MachineVerifier::verifyLiveVariables() {
+  assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+  for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
+         RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) {
+    LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+    for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+         MFI != MFE; ++MFI) {
+      BBInfo &MInfo = MBBInfoMap[MFI];
+
+      // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+      if (MInfo.vregsRequired.count(Reg)) {
+        if (!VI.AliveBlocks.test(MFI->getNumber())) {
+          report("LiveVariables: Block missing from AliveBlocks", MFI);
+          *OS << "Virtual register %reg" << Reg
+              << " must be live through the block.\n";
+        }
+      } else {
+        if (VI.AliveBlocks.test(MFI->getNumber())) {
+          report("LiveVariables: Block should not be in AliveBlocks", MFI);
+          *OS << "Virtual register %reg" << Reg
+              << " is not needed live through the block.\n";
+        }
+      }
+    }
+  }
+}
+
+
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index cd38dd1..b3802ed 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -21,7 +21,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/Function.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -37,37 +36,17 @@ using namespace llvm;
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
 STATISTIC(NumSplits, "Number of critical edges split on demand");
 
-static cl::opt<bool>
-SplitEdges("split-phi-edges",
-           cl::desc("Split critical edges during phi elimination"),
-           cl::init(false), cl::Hidden);
-
 char PHIElimination::ID = 0;
 static RegisterPass<PHIElimination>
 X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
 
 const PassInfo *const llvm::PHIEliminationID = &X;
 
-namespace llvm { FunctionPass *createLocalRegisterAllocator(); }
-
-// Should we run edge splitting?
-static bool shouldSplitEdges() {
-  // Edge splitting breaks the local register allocator. It cannot tolerate
-  // LiveVariables being run.
-  if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator)
-    return false;
-  return SplitEdges;
-}
-
 void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveVariables>();
   AU.addPreserved<MachineDominatorTree>();
-  if (shouldSplitEdges()) {
-    AU.addRequired<LiveVariables>();
-  } else {
-    AU.setPreservesCFG();
-    AU.addPreservedID(MachineLoopInfoID);
-  }
+  // rdar://7401784 This would be nice:
+  // AU.addPreservedID(MachineLoopInfoID);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -79,9 +58,9 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   bool Changed = false;
 
   // Split critical edges to help the coalescer
-  if (shouldSplitEdges())
+  if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>())
     for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-      Changed |= SplitPHIEdges(Fn, *I);
+      Changed |= SplitPHIEdges(Fn, *I, *LV);
 
   // Populate VRegPHIUseCount
   analyzePHINodes(Fn);
@@ -361,10 +340,11 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
 }
 
 bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
-                                         MachineBasicBlock &MBB) {
+                                         MachineBasicBlock &MBB,
+                                         LiveVariables &LV) {
   if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
     return false;   // Quick exit for basic blocks without PHIs.
-  LiveVariables &LV = getAnalysis<LiveVariables>();
+
   for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
        BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 94716ee..f8c9fe7 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -90,7 +90,8 @@ namespace llvm {
     void analyzePHINodes(const MachineFunction& Fn);
 
     /// Split critical edges where necessary for good coalescer performance.
-    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB);
+    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+                       LiveVariables &LV);
 
     /// isLiveOut - Determine if Reg is live out from MBB, when not
     /// considering PHI nodes. This means that Reg is either killed by
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 6930abf..fff50da 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -59,11 +59,6 @@ PreSplitIntervals("pre-alloc-split",
                   cl::desc("Pre-register allocation live interval splitting"),
                   cl::init(false), cl::Hidden);
 
-static cl::opt<bool>
-NewSpillFramework("new-spill-framework",
-                  cl::desc("New spilling framework"),
-                  cl::init(false), cl::Hidden);
-
 static RegisterRegAlloc
 linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
@@ -441,9 +436,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   vrm_ = &getAnalysis<VirtRegMap>();
   if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
   
-  if (NewSpillFramework) {
-    spiller_.reset(createSpiller(mf_, li_, ls_, vrm_));
-  }
+  spiller_.reset(createSpiller(mf_, li_, ls_, loopInfo, vrm_));
   
   initIntervalSets();
 
@@ -1157,11 +1150,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     SmallVector<LiveInterval*, 8> spillIs;
     std::vector<LiveInterval*> added;
     
-    if (!NewSpillFramework) {
-      added = li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_);
-    } else {
-      added = spiller_->spill(cur); 
-    }
+    added = spiller_->spill(cur, spillIs); 
 
     std::sort(added.begin(), added.end(), LISorter());
     addStackInterval(cur, ls_, li_, mri_, *vrm_);
@@ -1241,11 +1230,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
          earliestStartInterval : sli;
        
     std::vector<LiveInterval*> newIs;
-    if (!NewSpillFramework) {
-      newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_);
-    } else {
-      newIs = spiller_->spill(sli);
-    }
+    newIs = spiller_->spill(sli, spillIs);
     addStackInterval(sli, ls_, li_, mri_, *vrm_);
     std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
     spilled.insert(sli->reg);
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 9107325..20c4a28 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -18,11 +18,25 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+namespace {
+  enum SpillerName { trivial, standard };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+           cl::desc("Spiller to use: (default: standard)"),
+           cl::Prefix,
+           cl::values(clEnumVal(trivial, "trivial spiller"),
+                      clEnumVal(standard, "default spiller"),
+                      clEnumValEnd),
+           cl::init(standard));
+
 Spiller::~Spiller() {}
 
 namespace {
@@ -49,153 +63,9 @@ protected:
     tii = mf->getTarget().getInstrInfo();
   }
 
-  /// Ensures there is space before the given machine instruction, returns the
-  /// instruction's new number.
-  SlotIndex makeSpaceBefore(MachineInstr *mi) {
-    //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
-      // FIXME: Should be updated to use rewrite-in-place methods when they're
-      // introduced. Currently broken.
-      //lis->scaleNumbering(2);
-      //ls->scaleNumbering(2);
-    //}
-
-    SlotIndex miIdx = lis->getInstructionIndex(mi);
-
-    //assert(lis->hasGapBeforeInstr(miIdx));
-    
-    return miIdx;
-  }
-
-  /// Ensure there is space after the given machine instruction, returns the
-  /// instruction's new number.
-  SlotIndex makeSpaceAfter(MachineInstr *mi) {
-    //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
-      // FIXME: Should be updated to use rewrite-in-place methods when they're
-      // introduced. Currently broken.
-      // lis->scaleNumbering(2);
-      // ls->scaleNumbering(2);
-    //}
-
-    SlotIndex miIdx = lis->getInstructionIndex(mi);
-
-    //assert(lis->hasGapAfterInstr(miIdx));
-
-    return miIdx;
-  }  
-
-  /// Insert a store of the given vreg to the given stack slot immediately
-  /// after the given instruction. Returns the base index of the inserted
-  /// instruction. The caller is responsible for adding an appropriate
-  /// LiveInterval to the LiveIntervals analysis.
-  SlotIndex insertStoreAfter(MachineInstr *mi, unsigned ss,
-                                     unsigned vreg,
-                                     const TargetRegisterClass *trc) {
-
-    MachineBasicBlock::iterator nextInstItr(next(mi)); 
-
-    SlotIndex miIdx = makeSpaceAfter(mi);
-
-    tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
-                             true, ss, trc);
-    MachineBasicBlock::iterator storeInstItr(next(mi));
-    MachineInstr *storeInst = &*storeInstItr;
-    
-    return lis->InsertMachineInstrInMaps(storeInst);
-  }
-
-  /// Insert a store of the given vreg to the given stack slot immediately
-  /// before the given instructnion. Returns the base index of the inserted
-  /// Instruction.
-  SlotIndex insertStoreBefore(MachineInstr *mi, unsigned ss,
-                                      unsigned vreg,
-                                      const TargetRegisterClass *trc) {
-    SlotIndex miIdx = makeSpaceBefore(mi);
-  
-    tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
-    MachineBasicBlock::iterator storeInstItr(prior(mi));
-    MachineInstr *storeInst = &*storeInstItr;
-
-    return lis->InsertMachineInstrInMaps(storeInst);
-  }
-
-  void insertStoreAfterInstOnInterval(LiveInterval *li,
-                                      MachineInstr *mi, unsigned ss,
-                                      unsigned vreg,
-                                      const TargetRegisterClass *trc) {
-
-    SlotIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
-    SlotIndex start = lis->getInstructionIndex(mi).getDefIndex(),
-              end = storeInstIdx.getUseIndex();
-
-    VNInfo *vni =
-      li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
-    vni->addKill(storeInstIdx);
-    DEBUG(errs() << "    Inserting store range: [" << start
-                 << ", " << end << ")\n");
-    LiveRange lr(start, end, vni);
-      
-    li->addRange(lr);
-  }
-
-  /// Insert a load of the given vreg from the given stack slot immediately
-  /// after the given instruction. Returns the base index of the inserted
-  /// instruction. The caller is responsibel for adding/removing an appropriate
-  /// range vreg's LiveInterval.
-  SlotIndex insertLoadAfter(MachineInstr *mi, unsigned ss,
-                                    unsigned vreg,
-                                    const TargetRegisterClass *trc) {
-
-    MachineBasicBlock::iterator nextInstItr(next(mi)); 
-
-    SlotIndex miIdx = makeSpaceAfter(mi);
-
-    tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
-    MachineBasicBlock::iterator loadInstItr(next(mi));
-    MachineInstr *loadInst = &*loadInstItr;
-    
-    return lis->InsertMachineInstrInMaps(loadInst);
-  }
-
-  /// Insert a load of the given vreg from the given stack slot immediately
-  /// before the given instruction. Returns the base index of the inserted
-  /// instruction. The caller is responsible for adding an appropriate
-  /// LiveInterval to the LiveIntervals analysis.
-  SlotIndex insertLoadBefore(MachineInstr *mi, unsigned ss,
-                                     unsigned vreg,
-                                     const TargetRegisterClass *trc) {  
-    SlotIndex miIdx = makeSpaceBefore(mi);
-  
-    tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
-    MachineBasicBlock::iterator loadInstItr(prior(mi));
-    MachineInstr *loadInst = &*loadInstItr;
-
-    return lis->InsertMachineInstrInMaps(loadInst);
-  }
-
-  void insertLoadBeforeInstOnInterval(LiveInterval *li,
-                                      MachineInstr *mi, unsigned ss, 
-                                      unsigned vreg,
-                                      const TargetRegisterClass *trc) {
-
-    SlotIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
-    SlotIndex start = loadInstIdx.getDefIndex(),
-              end = lis->getInstructionIndex(mi).getUseIndex();
-
-    VNInfo *vni =
-      li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
-    vni->addKill(lis->getInstructionIndex(mi));
-    DEBUG(errs() << "    Intserting load range: [" << start
-                 << ", " << end << ")\n");
-    LiveRange lr(start, end, vni);
-
-    li->addRange(lr);
-  }
-
-
-
   /// Add spill ranges for every use/def of the live interval, inserting loads
-  /// immediately before each use, and stores after each def. No folding is
-  /// attempted.
+  /// immediately before each use, and stores after each def. No folding or
+  /// remat is attempted.
   std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
     DEBUG(errs() << "Spilling everywhere " << *li << "\n");
 
@@ -212,56 +82,77 @@ protected:
     const TargetRegisterClass *trc = mri->getRegClass(li->reg);
     unsigned ss = vrm->assignVirt2StackSlot(li->reg);
 
+    // Iterate over reg uses/defs.
     for (MachineRegisterInfo::reg_iterator
          regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
 
+      // Grab the use/def instr.
       MachineInstr *mi = &*regItr;
 
       DEBUG(errs() << "  Processing " << *mi);
 
+      // Step regItr to the next use/def instr.
       do {
         ++regItr;
       } while (regItr != mri->reg_end() && (&*regItr == mi));
       
+      // Collect uses & defs for this instr.
       SmallVector<unsigned, 2> indices;
       bool hasUse = false;
       bool hasDef = false;
-    
       for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
         MachineOperand &op = mi->getOperand(i);
-
         if (!op.isReg() || op.getReg() != li->reg)
           continue;
-      
         hasUse |= mi->getOperand(i).isUse();
         hasDef |= mi->getOperand(i).isDef();
-      
         indices.push_back(i);
       }
 
+      // Create a new vreg & interval for this instr.
       unsigned newVReg = mri->createVirtualRegister(trc);
       vrm->grow();
       vrm->assignVirt2StackSlot(newVReg, ss);
-
       LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
       newLI->weight = HUGE_VALF;
       
+      // Update the reg operands & kill flags.
       for (unsigned i = 0; i < indices.size(); ++i) {
-        mi->getOperand(indices[i]).setReg(newVReg);
-
-        if (mi->getOperand(indices[i]).isUse()) {
-          mi->getOperand(indices[i]).setIsKill(true);
+        unsigned mopIdx = indices[i];
+        MachineOperand &mop = mi->getOperand(mopIdx);
+        mop.setReg(newVReg);
+        if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+          mop.setIsKill(true);
         }
       }
-
       assert(hasUse || hasDef);
 
+      // Insert reload if necessary.
+      MachineBasicBlock::iterator miItr(mi);
       if (hasUse) {
-        insertLoadBeforeInstOnInterval(newLI, mi, ss, newVReg, trc);
+        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc);
+        MachineInstr *loadInstr(prior(miItr));
+        SlotIndex loadIndex =
+          lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+        SlotIndex endIndex = loadIndex.getNextIndex();
+        VNInfo *loadVNI =
+          newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+        loadVNI->addKill(endIndex);
+        newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
       }
 
+      // Insert store if necessary.
       if (hasDef) {
-        insertStoreAfterInstOnInterval(newLI, mi, ss, newVReg, trc);
+        tii->storeRegToStackSlot(*mi->getParent(), next(miItr), newVReg, true,
+                                 ss, trc);
+        MachineInstr *storeInstr(next(miItr));
+        SlotIndex storeIndex =
+          lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+        SlotIndex beginIndex = storeIndex.getPrevIndex();
+        VNInfo *storeVNI =
+          newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+        storeVNI->addKill(storeIndex);
+        newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
       }
 
       added.push_back(newLI);
@@ -279,60 +170,32 @@ class TrivialSpiller : public SpillerBase {
 public:
 
   TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
-                 VirtRegMap *vrm) :
-    SpillerBase(mf, lis, ls, vrm) {}
+                 VirtRegMap *vrm)
+    : SpillerBase(mf, lis, ls, vrm) {}
 
-  std::vector<LiveInterval*> spill(LiveInterval *li) {
+  std::vector<LiveInterval*> spill(LiveInterval *li,
+                                   SmallVectorImpl<LiveInterval*> &spillIs) {
+    // Ignore spillIs - we don't use it.
     return trivialSpillEverywhere(li);
   }
 
-  std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno)  {
-    std::vector<LiveInterval*> spillIntervals;
-
-    if (!valno->isDefAccurate() && !valno->isPHIDef()) {
-      // Early out for values which have no well defined def point.
-      return spillIntervals;
-    }
-
-    // Ok.. we should be able to proceed...
-    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
-    unsigned ss = vrm->assignVirt2StackSlot(li->reg);    
-    vrm->grow();
-    vrm->assignVirt2StackSlot(li->reg, ss);
-
-    MachineInstr *mi = 0;
-    SlotIndex storeIdx = SlotIndex();
-
-    if (valno->isDefAccurate()) {
-      // If we have an accurate def we can just grab an iterator to the instr
-      // after the def.
-      mi = lis->getInstructionFromIndex(valno->def);
-      storeIdx = insertStoreAfter(mi, ss, li->reg, trc).getDefIndex();
-    } else {
-      // if we get here we have a PHI def.
-      mi = &lis->getMBBFromIndex(valno->def)->front();
-      storeIdx = insertStoreBefore(mi, ss, li->reg, trc).getDefIndex();
-    }
-
-    MachineBasicBlock *defBlock = mi->getParent();
-    SlotIndex loadIdx = SlotIndex();
-
-    // Now we need to find the load...
-    MachineBasicBlock::iterator useItr(mi);
-    for (; !useItr->readsRegister(li->reg); ++useItr) {}
-
-    if (useItr != defBlock->end()) {
-      MachineInstr *loadInst = useItr;
-      loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc).getUseIndex();
-    }
-    else {
-      MachineInstr *loadInst = &defBlock->back();
-      loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc).getUseIndex();
-    }
-
-    li->removeRange(storeIdx, loadIdx, true);
+};
 
-    return spillIntervals;
+/// Falls back on LiveIntervals::addIntervalsForSpills.
+class StandardSpiller : public Spiller {
+private:
+  LiveIntervals *lis;
+  const MachineLoopInfo *loopInfo;
+  VirtRegMap *vrm;
+public:
+  StandardSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+                  const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
+    : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
+
+  /// Falls back on LiveIntervals::addIntervalsForSpills.
+  std::vector<LiveInterval*> spill(LiveInterval *li,
+                                   SmallVectorImpl<LiveInterval*> &spillIs) {
+    return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
   }
 
 };
@@ -340,6 +203,12 @@ public:
 }
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
-                                   LiveStacks *ls, VirtRegMap *vrm) {
-  return new TrivialSpiller(mf, lis, ls, vrm);
+                                   LiveStacks *ls,
+                                   const MachineLoopInfo *loopInfo,
+                                   VirtRegMap *vrm) {
+  switch (spillerOpt) {
+    case trivial: return new TrivialSpiller(mf, lis, ls, vrm); break;
+    case standard: return new StandardSpiller(mf, lis, ls, loopInfo, vrm); break;
+    default: llvm_unreachable("Unreachable!"); break;
+  }
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 9c3900d..7ec8e6d 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CODEGEN_SPILLER_H
 #define LLVM_CODEGEN_SPILLER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include <vector>
 
 namespace llvm {
@@ -19,6 +20,7 @@ namespace llvm {
   class LiveStacks;
   class MachineFunction;
   class MachineInstr;
+  class MachineLoopInfo;
   class VirtRegMap;
   class VNInfo;
 
@@ -32,17 +34,15 @@ namespace llvm {
 
     /// Spill the given live range. The method used will depend on the Spiller
     /// implementation selected.
-    virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0;
-
-    /// Intra-block split.
-    virtual std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li,
-                                                       VNInfo *valno) = 0;
+    virtual std::vector<LiveInterval*> spill(LiveInterval *li,
+                                   SmallVectorImpl<LiveInterval*> &spillIs) = 0;
 
   };
 
   /// Create and return a spiller object, as specified on the command line.
   Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
-                         LiveStacks *ls, VirtRegMap *vrm);
+                         LiveStacks *ls, const MachineLoopInfo *loopInfo,
+                         VirtRegMap *vrm);
 }
 
 #endif
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 84467ed..5fa690b 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -112,8 +112,7 @@ namespace {
                                MachineBasicBlock *MBB, unsigned Dist);
     bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
                            MachineBasicBlock::iterator &nmi,
-                           MachineFunction::iterator &mbbi,
-                           unsigned regB, unsigned regBIdx, unsigned Dist);
+                           MachineFunction::iterator &mbbi, unsigned Dist);
 
     bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
                                  MachineBasicBlock::iterator &nmi,
@@ -730,7 +729,7 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
 
 /// isSafeToDelete - If the specified instruction does not produce any side
 /// effects and all of its defs are dead, then it's safe to delete.
-static bool isSafeToDelete(MachineInstr *MI, unsigned Reg,
+static bool isSafeToDelete(MachineInstr *MI,
                            const TargetInstrInfo *TII,
                            SmallVector<unsigned, 4> &Kills) {
   const TargetInstrDesc &TID = MI->getDesc();
@@ -745,10 +744,9 @@ static bool isSafeToDelete(MachineInstr *MI, unsigned Reg,
       continue;
     if (MO.isDef() && !MO.isDead())
       return false;
-    if (MO.isUse() && MO.getReg() != Reg && MO.isKill())
+    if (MO.isUse() && MO.isKill())
       Kills.push_back(MO.getReg());
   }
-
   return true;
 }
 
@@ -783,11 +781,10 @@ bool
 TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
                                              MachineBasicBlock::iterator &nmi,
                                              MachineFunction::iterator &mbbi,
-                                             unsigned regB, unsigned regBIdx,
                                              unsigned Dist) {
   // Check if the instruction has no side effects and if all its defs are dead.
   SmallVector<unsigned, 4> Kills;
-  if (!isSafeToDelete(mi, regB, TII, Kills))
+  if (!isSafeToDelete(mi, TII, Kills))
     return false;
 
   // If this instruction kills some virtual registers, we need to
@@ -810,10 +807,6 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
           LV->addVirtualRegisterKilled(Kill, NewKill);
       }
     }
-
-    // If regB was marked as a kill, update its Kills list.
-    if (mi->getOperand(regBIdx).isKill())
-      LV->removeVirtualRegisterKilled(regB, mi);
   }
 
   mbbi->erase(mi); // Nuke the old inst.
@@ -842,7 +835,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   // it so it doesn't clobber regB.
   bool regBKilled = isKilled(*mi, regB, MRI, TII);
   if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
-      DeleteUnusedInstr(mi, nmi, mbbi, regB, SrcIdx, Dist)) {
+      DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
     ++NumDeletes;
     return true; // Done with this instruction.
   }
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index ec0abd1..c836286 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -77,27 +77,38 @@ struct TrivialRewriter : public VirtRegRewriter {
     DEBUG(MF.dump());
 
     MachineRegisterInfo *mri = &MF.getRegInfo();
+    const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
 
     bool changed = false;
 
     for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
          liItr != liEnd; ++liItr) {
 
-      if (TargetRegisterInfo::isVirtualRegister(liItr->first)) {
-        if (VRM.hasPhys(liItr->first)) {
-          unsigned preg = VRM.getPhys(liItr->first);
-          mri->replaceRegWith(liItr->first, preg);
-          mri->setPhysRegUsed(preg);
-          changed = true;
-        }
+      const LiveInterval *li = liItr->second;
+      unsigned reg = li->reg;
+
+      if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+        if (!li->empty())
+          mri->setPhysRegUsed(reg);
       }
       else {
-        if (!liItr->second->empty()) {
-          mri->setPhysRegUsed(liItr->first);
+        if (!VRM.hasPhys(reg))
+          continue;
+        unsigned pReg = VRM.getPhys(reg);
+        mri->setPhysRegUsed(pReg);
+        for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg),
+             regEnd = mri->reg_end(); regItr != regEnd;) {
+          MachineOperand &mop = regItr.getOperand();
+          assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
+          ++regItr;
+          unsigned subRegIdx = mop.getSubReg();
+          unsigned pRegOp = subRegIdx ? tri->getSubReg(pReg, subRegIdx) : pReg;
+          mop.setReg(pRegOp);
+          mop.setSubReg(0);
+          changed = true;
         }
       }
     }
-
     
     DEBUG(errs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index cb9bd6a..7033861 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,16 +89,18 @@ def : ProcNoItin<"xscale",          [ArchV5TE]>;
 def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
 
 // V6 Processors.
-def : ProcNoItin<"arm1136j-s",      [ArchV6]>;
-def : ProcNoItin<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
-def : ProcNoItin<"arm1176jz-s",     [ArchV6]>;
-def : ProcNoItin<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
-def : ProcNoItin<"mpcorenovfp",     [ArchV6]>;
-def : ProcNoItin<"mpcore",          [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136j-s",       ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1136jf-s",      ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jz-s",      ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcorenovfp",      ARMV6Itineraries, [ArchV6]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
 
 // V6T2 Processors.
-def : ProcNoItin<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
-def : ProcNoItin<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s",     ARMV6Itineraries,
+                 [ArchV6T2, FeatureThumb2]>;
+def : Processor<"arm1156t2f-s",    ARMV6Itineraries,
+                 [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
 
 // V7 Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9be7454..696a8e1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1545,7 +1545,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
       InFlag = SDValue(ResNode, 1);
       ReplaceUses(SDValue(Op.getNode(), 1), InFlag);
     }
-    ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo()));
+    ReplaceUses(SDValue(Op.getNode(), 0),
+                SDValue(Chain.getNode(), Chain.getResNo()));
     return NULL;
   }
   case ARMISD::CMOV: {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3fe634e..79bde29 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -608,11 +608,11 @@ def PICSTR  : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
                [(store GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h\t$src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr",
                [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b\t$src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr",
                [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
 }
 } // isNotDuplicable = 1
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 2796364..d1831d1 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -66,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(V, MVT::i32);
 }]>;
 
+// Scaled 4 immediate.
+def t_imm_s4 : Operand<i32> {
+  let PrintMethod = "printThumbS4ImmOperand";
+}
+
 // Define Thumb specific addressing modes.
 
 // t_addrmode_rr := reg + reg
@@ -134,20 +139,20 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
                  [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
 
 // PC relative add.
-def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi,
-                  "add\t$dst, pc, $rhs * 4", []>;
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
+                  "add\t$dst, pc, $rhs", []>;
 
 // ADD rd, sp, #imm8
-def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi,
-                  "add\t$dst, $sp, $rhs * 4", []>;
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
+                  "add\t$dst, $sp, $rhs", []>;
 
 // ADD sp, sp, #imm7
-def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                  "add\t$dst, $rhs * 4", []>;
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+                  "add\t$dst, $rhs", []>;
 
 // SUB sp, sp, #imm7
-def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                  "sub\t$dst, $rhs * 4", []>;
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+                  "sub\t$dst, $rhs", []>;
 
 // ADD rm, sp
 def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
@@ -159,8 +164,8 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
 
 // Pseudo instruction that will expand into a tSUBspi + a copy.
 let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-               NoItinerary, "@ sub\t$dst, $rhs * 4", []>;
+def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
+               NoItinerary, "@ sub\t$dst, $rhs", []>;
 
 def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                NoItinerary, "@ add\t$dst, $rhs", []>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 1ace718..0fef466 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -11,4 +11,190 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: Add model for an ARM11
+// Model based on ARM1176
+//
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
+//
+def ARMV6Itineraries : ProcessorItineraries<[
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0]>], [2]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0]>], [3]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0]>], [3, 2]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0]>], [3, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+
+  // Integer multiply pipeline
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>,
+  
+  // Integer load pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Pipe0]>], [4, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>,
+
+  //
+  // Load multiple
+  InstrItinData<IIC_iLoadm   , [InstrStage<3, [FU_Pipe0]>]>,
+
+  // Integer store pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>,
+
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStorem   , [InstrStage<3, [FU_Pipe0]>]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+  //
+  // Double-precision FP Load
+  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+  //
+  // FP Load Multiple
+  InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  //
+  // FP Store Multiple
+  InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]>
+]>;
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
index e565813..427645c 100644
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -184,7 +184,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   //
   // Single-precision FP Compare
   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -221,7 +221,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   //
   // Single-precision FP ALU
   InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
   //
   // Double-precision FP ALU
   InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -230,7 +230,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   //
   // Single-precision FP Multiply
   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
   //
   // Double-precision FP Multiply
   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -239,7 +239,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   //
   // Single-precision FP MAC
   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
   //
   // Double-precision FP MAC
   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 0352503..dd4a240 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -110,6 +110,7 @@ namespace {
                                 const char *Modifier = 0);
     void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
 
+    void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum);
     void printThumbITMask(const MachineInstr *MI, int OpNum);
     void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum);
     void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
@@ -674,6 +675,10 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
 
 //===--------------------------------------------------------------------===//
 
+void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op) {
+  O << "#" <<  MI->getOperand(Op).getImm() * 4;
+}
+
 void
 ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
   // (3 - the number of trailing zeros) is the number of then / else.
@@ -713,7 +718,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
   if (MO3.getReg())
     O << ", " << getRegisterName(MO3.getReg());
   else if (unsigned ImmOffs = MO2.getImm())
-    O << ", #" << ImmOffs * Scale;
+    O << ", #+" << ImmOffs * Scale;
   O << "]";
 }
 
@@ -735,7 +740,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   O << "[" << getRegisterName(MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm())
-    O << ", #" << ImmOffs << " * 4";
+    O << ", #+" << ImmOffs*4;
   O << "]";
 }
 
@@ -801,9 +806,9 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
   int32_t OffImm = (int32_t)MO2.getImm() / 4;
   // Don't print +0.
   if (OffImm < 0)
-    O << ", #-" << -OffImm << " * 4";
+    O << ", #-" << -OffImm * 4;
   else if (OffImm > 0)
-    O << ", #+" << OffImm << " * 4";
+    O << ", #+" << OffImm * 4;
   O << "]";
 }
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 0047925..9fc57e0 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -351,3 +351,8 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) {
   // FIXME: remove this.
   abort();
 }
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) {
+  // FIXME: remove this.
+  abort();
+}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 9e7f8d5..23a7f05 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -52,7 +52,8 @@ public:
                               const char *Modifier = 0);
     
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum);
-  
+
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum);
   void printThumbITMask(const MCInst *MI, unsigned OpNum) {}
   void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {}
   void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 9ce30aa..ad1739c 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -105,7 +105,7 @@ namespace {
 
     // FIXME: Clean this up after splitting each Thumb load / store opcode
     // into multiple ones.
-    { ARM::t2LDRi12,ARM::tLDR,    0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRi12,ARM::tLDR,    ARM::tLDRspi,  5,   8,    1,   0,  0,0, 1 },
     { ARM::t2LDRs,  ARM::tLDR,    0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRBi12,ARM::tLDRB,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRBs, ARM::tLDRB,   0,             0,   0,    1,   0,  0,0, 1 },
@@ -113,7 +113,7 @@ namespace {
     { ARM::t2LDRHs, ARM::tLDRH,   0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRi12,ARM::tSTR,    0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRi12,ARM::tSTR,    ARM::tSTRspi,  5,   8,    1,   0,  0,0, 1 },
     { ARM::t2STRs,  ARM::tSTR,    0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRBi12,ARM::tSTRB,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRBs, ARM::tSTRB,   0,             0,   0,    1,   0,  0,0, 1 },
@@ -244,8 +244,13 @@ static bool VerifyLowRegs(MachineInstr *MI) {
       continue;
     if (isLROk && Reg == ARM::LR)
       continue;
-    if (isSPOk && Reg == ARM::SP)
-      continue;
+    if (Reg == ARM::SP) {
+      if (isSPOk)
+        continue;
+      if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+        // Special case for these ldr / str with sp as base register.
+        continue;
+    }
     if (!isARMLowRegister(Reg))
       return false;
   }
@@ -261,17 +266,26 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   unsigned Scale = 1;
   bool HasImmOffset = false;
   bool HasShift = false;
+  bool HasOffReg = true;
   bool isLdStMul = false;
   unsigned Opc = Entry.NarrowOpc1;
   unsigned OpNum = 3; // First 'rest' of operands.
+  uint8_t  ImmLimit = Entry.Imm1Limit;
   switch (Entry.WideOpc) {
   default:
     llvm_unreachable("Unexpected Thumb2 load / store opcode!");
   case ARM::t2LDRi12:
-  case ARM::t2STRi12:
+  case ARM::t2STRi12: {
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    if (BaseReg == ARM::SP) {
+      Opc = Entry.NarrowOpc2;
+      ImmLimit = Entry.Imm2Limit;
+      HasOffReg = false;
+    }
     Scale = 4;
     HasImmOffset = true;
     break;
+  }
   case ARM::t2LDRBi12:
   case ARM::t2STRBi12:
     HasImmOffset = true;
@@ -325,7 +339,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   unsigned OffsetImm = 0;
   if (HasImmOffset) {
     OffsetImm = MI->getOperand(2).getImm();
-    unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale;
+    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
     if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
       // Make sure the immediate field fits.
       return false;
@@ -337,7 +351,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
   if (!isLdStMul) {
     MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
-    if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
+    if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) {
       // tLDRSB and tLDRSH do not have an immediate offset field. On the other
       // hand, it must have an offset register.
       // FIXME: Remove this special case.
@@ -345,13 +359,17 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     }
     assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
 
-    MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+    if (HasOffReg)
+      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
   }
 
   // Transfer the rest of operands.
   for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
     MIB.addOperand(MI->getOperand(OpNum));
 
+  // Transfer memoperands.
+  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
   MBB.erase(MI);
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 4898fae..efd3fb7 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -365,6 +365,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     case MachineOperand::MO_ConstantPoolIndex:
       O << MAI->getPrivateGlobalPrefix() << "CPI"
         << getFunctionNumber() << "_" << MO.getIndex();
+      if (MO.getOffset())
+        O << "+" << MO.getOffset();
       break;
   
     default:
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index cbcedb8..2990ba9 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -90,6 +90,8 @@ private:
   bool SelectAddr(SDValue Op, SDValue N, 
                   SDValue &Base, SDValue &Offset);
 
+  SDNode *SelectLoadFp64(SDValue N);
+  SDNode *SelectStoreFp64(SDValue N);
 
   // getI32Imm - Return a target constant with the specified
   // value, of type i32.
@@ -198,6 +200,121 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
   return true;
 }
 
+SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) {
+  MVT::SimpleValueType NVT = 
+    N.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+
+  if (!Subtarget.isMips1() || NVT != MVT::f64)
+    return NULL;
+
+  if (!Predicate_unindexedload(N.getNode()) ||
+      !Predicate_load(N.getNode()))
+    return NULL;
+
+  SDValue Chain = N.getOperand(0);
+  SDValue N1 = N.getOperand(1);
+  SDValue Offset0, Offset1, Base;
+
+  if (!SelectAddr(N, N1, Offset0, Base) ||
+      N1.getValueType() != MVT::i32)
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  DebugLoc dl = N.getDebugLoc();
+
+  // The second load should start after for 4 bytes. 
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+  else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), 
+                                            MVT::i32, 
+                                            CP->getAlignment(), 
+                                            CP->getOffset()+4, 
+                                            CP->getTargetFlags());
+  else
+    return NULL;
+
+  // Instead of:
+  //    ldc $f0, X($3)
+  // Generate:
+  //    lwc $f0, X($3)
+  //    lwc $f1, X+4($3)
+  SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, 
+                                    MVT::Other, Offset0, Base, Chain);
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, 
+                            MVT::f64, Undef, SDValue(LD0, 0));
+
+  SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+                          MVT::Other, Offset1, Base, SDValue(LD0, 1));
+  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, 
+                            MVT::f64, I0, SDValue(LD1, 0));
+
+  ReplaceUses(N, I1);
+  ReplaceUses(N.getValue(1), Chain);
+  cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  return I1.getNode();
+}
+
+SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) {
+
+  if (!Subtarget.isMips1() || 
+      N.getOperand(1).getValueType() != MVT::f64)
+    return NULL;
+
+  SDValue Chain = N.getOperand(0);
+
+  if (!Predicate_unindexedstore(N.getNode()) ||
+      !Predicate_store(N.getNode()))
+    return NULL;
+
+  SDValue N1 = N.getOperand(1);
+  SDValue N2 = N.getOperand(2);
+  SDValue Offset0, Offset1, Base;
+
+  if (!SelectAddr(N, N2, Offset0, Base) ||
+      N1.getValueType() != MVT::f64 ||
+      N2.getValueType() != MVT::i32)
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  DebugLoc dl = N.getDebugLoc();
+
+  // Get the even and odd part from the f64 register
+  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, 
+                                                 dl, MVT::f32, N1);
+  SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN,
+                                                 dl, MVT::f32, N1);
+
+  // The second store should start after for 4 bytes. 
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+  else
+    return NULL;
+
+  // Instead of:
+  //    sdc $f0, X($3)
+  // Generate:
+  //    swc $f0, X($3)
+  //    swc $f1, X+4($3)
+  SDValue Ops0[] = { FPEven, Offset0, Base, Chain };
+  Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+                                       MVT::Other, Ops0, 4), 0);
+  cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  SDValue Ops1[] = { FPOdd, Offset1, Base, Chain };
+  Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+                                       MVT::Other, Ops1, 4), 0);
+  cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  ReplaceUses(N.getValue(0), Chain);
+  return Chain.getNode();
+}
+
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
 SDNode* MipsDAGToDAGISel::Select(SDValue N) {
@@ -345,6 +462,18 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
       break;
     }
 
+    case ISD::LOAD:
+      if (SDNode *ResNode = SelectLoadFp64(N))
+        return ResNode;
+      // Other cases are autogenerated.
+      break;
+
+    case ISD::STORE:
+      if (SDNode *ResNode = SelectStoreFp64(N))
+        return ResNode;
+      // Other cases are autogenerated.
+      break;
+
     /// Handle direct and indirect calls when using PIC. On PIC, when 
     /// GOT is smaller than about 64k (small code) the GA target is 
     /// loaded with only one instruction. Otherwise GA's target must 
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 5b45921..6a3ec00 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -23,6 +23,15 @@ class MipsSubtarget;
 class TargetInstrInfo;
 class Type;
 
+namespace Mips {
+  /// SubregIndex - The index of various sized subregister classes. Note that 
+  /// these indices must be kept in sync with the class indices in the 
+  /// MipsRegisterInfo.td file.
+  enum SubregIndex {
+    SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+  };
+}
+
 struct MipsRegisterInfo : public MipsGenRegisterInfo {
   const MipsSubtarget &Subtarget;
   const TargetInstrInfo &TII;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index bbb275c..00e7723 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -17,6 +17,12 @@ class MipsReg<string n> : Register<n> {
   let Namespace = "Mips";
 }
 
+class MipsRegWithSubRegs<string n, list<Register> subregs> 
+  : RegisterWithSubRegs<n, subregs> {
+  field bits<5> Num;
+  let Namespace = "Mips";
+}
+
 // Mips CPU Registers
 class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
   let Num = num;
@@ -28,9 +34,9 @@ class FPR<bits<5> num, string n> : MipsReg<n> {
 }
 
 // Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> aliases> : MipsReg<n> {
+class AFPR<bits<5> num, string n, list<Register> subregs> 
+  : MipsRegWithSubRegs<n, subregs> {
   let Num = num;
-  let Aliases = aliases;
 }
 
 //===----------------------------------------------------------------------===//
@@ -135,6 +141,23 @@ let Namespace = "Mips" in {
 }
 
 //===----------------------------------------------------------------------===//
+// Subregister Set Definitions
+//===----------------------------------------------------------------------===//
+
+def mips_subreg_fpeven : PatLeaf<(i32 1)>;
+def mips_subreg_fpodd  : PatLeaf<(i32 2)>;
+
+def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, 
+                    D8, D9, D10, D11, D12, D13, D14, D15],
+                   [F0, F2, F4, F6, F8, F10, F12, F14,
+                    F16, F18, F20, F22, F24, F26, F28, F30]>;
+
+def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, 
+                    D8, D9, D10, D11, D12, D13, D14, D15],
+                   [F1, F3, F5, F7, F9, F11, F13, F15,
+                    F17, F19, F21, F23, F25, F27, F29, F31]>;
+
+//===----------------------------------------------------------------------===//
 // Register Classes
 //===----------------------------------------------------------------------===//
 
@@ -232,6 +255,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64,
   // Reserved
   D15]>
 {
+  let SubRegClassList = [FGR32, FGR32];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 85e9d65..0fb423d 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCSectionELF.h"
@@ -56,6 +57,12 @@ bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
 bool MipsTargetObjectFile::
 IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
                        SectionKind Kind) const {
+
+  // Only use small section for non linux targets.
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  if (Subtarget.isLinux())
+    return false;
+
   // Only global variables, not functions.
   const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
   if (!GVA)
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 664a43c..590574e 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -357,3 +357,30 @@ void SubtargetFeatures::print(raw_ostream &OS) const {
 void SubtargetFeatures::dump() const {
   print(errs());
 }
+
+/// getDefaultSubtargetFeatures - Return a string listing
+/// the features associated with the target triple.
+///
+/// FIXME: This is an inelegant way of specifying the features of a
+/// subtarget. It would be better if we could encode this information
+/// into the IR. See <rdar://5972456>.
+///
+std::string SubtargetFeatures::getDefaultSubtargetFeatures(
+                                               const Triple& Triple) {
+  switch (Triple.getVendor()) {
+  case Triple::Apple:
+    switch (Triple.getArch()) {
+    case Triple::ppc:   // powerpc-apple-*
+      return std::string("altivec");
+    case Triple::ppc64: // powerpc64-apple-*
+      return std::string("64bit,altivec");
+    default:
+      break;
+    }
+    break;
+  default:
+    break;
+  } 
+
+  return std::string("");
+}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index f887523..6fdbc92 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -783,8 +783,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
-                                SectionKind::getDataRel());
+  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                SectionKind::getReadOnlyWithRel());
   EHFrameSection =
     getMachOSection("__TEXT", "__eh_frame",
                     MCSectionMachO::S_COALESCED |
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index d7106a0..2a561c6 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -333,6 +333,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << MO.getIndex();
+  case MachineOperand::MO_BlockAddress:
+    GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
     break;
   default:
     llvm_unreachable("not implemented");
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 16e68fe..00dcce6 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -111,7 +111,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
 
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
-  
+  setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
+
   // Thread Local Storage
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   
@@ -158,6 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
   {
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
   case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
   case ISD::LOAD:             return LowerLOAD(Op, DAG);
@@ -288,6 +290,17 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG)
+{
+  DebugLoc DL = Op.getDebugLoc();
+
+  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+
+  return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+}
+
+SDValue XCoreTargetLowering::
 LowerConstantPool(SDValue Op, SelectionDAG &DAG)
 {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 10631af..f86be5e 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -120,6 +120,7 @@ namespace llvm {
     SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 4ed4ed4..d4ae49e 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -679,6 +679,12 @@ def LDAP_lu10 : _FLU10<
                   "ldap r11, $addr",
                   [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
 
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10_ba : _FLU10<(outs),
+                          (ins i32imm:$addr),
+                          "ldap r11, $addr",
+                          [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+
 let isCall=1,
 // All calls clobber the the link register and the non-callee-saved registers:
 Defs = [R0, R1, R2, R3, R11, LR] in {
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 2ab0972..44a2c1f 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -305,6 +305,12 @@ ReprocessLoop:
     }
   }
 
+  // If there are duplicate phi nodes (for example, from loop rotation),
+  // get rid of them.
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB)
+    EliminateDuplicatePHINodes(*BB);
+
   return Changed;
 }
 
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 8dbc808..89b0bd9 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1594,7 +1594,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
 /// which differ only in the order of the incoming values, but instcombine
 /// orders them so it usually won't matter.
 ///
-static bool EliminateDuplicatePHINodes(BasicBlock *BB) {
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
   bool Changed = false;
   
   // This implementation doesn't currently consider undef operands
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
new file mode 100644
index 0000000..1a867a9
--- /dev/null
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+
+declare i8* @choose(i8*, i8*);
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      qux
+; CHECK:      qux
+; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      str r
+; CHECK-NEXT: bx r
+; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      str r
+; CHECK-NEXT: bx r
+; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      str r
+; CHECK-NEXT: bx r
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index 1244a3e..c41d521 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -1,7 +1,6 @@
 ; Double return in abicall (default)
 ; RUN: llc < %s -march=mips
 ; PR2615
-; XFAIL: *
 
 define double @main(...) {
 entry:
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
index c5e86ad..0e1b2e5 100644
--- a/test/CodeGen/Thumb/pop.ll
+++ b/test/CodeGen/Thumb/pop.ll
@@ -4,7 +4,7 @@
 define arm_apcscc void @t(i8* %a, ...) nounwind {
 ; CHECK:      t:
 ; CHECK:      pop {r3}
-; CHECK-NEXT: add sp, #3 * 4
+; CHECK-NEXT: add sp, #12
 ; CHECK-NEXT: bx r3
 entry:
   %a.addr = alloca i8*
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index ec649c3..ef076a4 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -6,7 +6,7 @@
 define arm_apcscc i32 @t(i32, ...) nounwind {
 entry:
 ; CHECK: t:
-; CHECK: add r7, sp, #3 * 4
+; CHECK: add r7, sp, #12
 	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
 	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
 	%3 = getelementptr i8* %1, i32 8		; <i8*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index 03f9fac..4077535 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -6,7 +6,7 @@ define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind
 entry:
 ; CHECK: __gcov_execlp:
 ; CHECK: mov sp, r7
-; CHECK: sub sp, #1 * 4
+; CHECK: sub sp, #4
 	call arm_aapcscc  void @__gcov_flush() nounwind
 	br i1 undef, label %bb5, label %bb
 
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 18d507c..6f59961 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -2,7 +2,7 @@
 
 define void @test1() {
 ; CHECK: test1:
-; CHECK: sub sp, #64 * 4
+; CHECK: sub sp, #256
     %tmp = alloca [ 64 x i32 ] , align 4
     ret void
 }
@@ -10,7 +10,7 @@ define void @test1() {
 define void @test2() {
 ; CHECK: test2:
 ; CHECK: sub.w sp, sp, #4160
-; CHECK: sub sp, #2 * 4
+; CHECK: sub sp, #8
     %tmp = alloca [ 4168 x i8 ] , align 4
     ret void
 }
@@ -18,7 +18,7 @@ define void @test2() {
 define i32 @test3() {
 ; CHECK: test3:
 ; CHECK: sub.w sp, sp, #805306368
-; CHECK: sub sp, #6 * 4
+; CHECK: sub sp, #24
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 4c8ffe8..7cbe260 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -49,6 +49,12 @@ bb119:                                            ; preds = %bb20, %bb20
   unreachable
 
 bb420:                                            ; preds = %bb20, %bb20
+; CHECK: bb420
+; CHECK: str r{{[0-7]}}, [sp]
+; CHECK: str r{{[0-7]}}, [sp, #+4]
+; CHECK: str r{{[0-7]}}, [sp, #+8]
+; CHECK: ldr r{{[0-7]}}, [sp, #+28]
+; CHECK: str r{{[0-7]}}, [sp, #+24]
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll b/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
new file mode 100644
index 0000000..0edaa70
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR 5300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@g_296 = external global i8, align 1              ; <i8*> [#uses=1]
+
+define noalias i8** @func_31(i32** nocapture %int8p_33, i8** nocapture %p_34, i8* nocapture %p_35) nounwind {
+entry:
+  %cmp.i = icmp sgt i16 undef, 234                ; <i1> [#uses=1]
+  %tmp17 = select i1 %cmp.i, i16 undef, i16 0     ; <i16> [#uses=2]
+  %conv8 = trunc i16 %tmp17 to i8                 ; <i8> [#uses=3]
+  br i1 undef, label %cond.false.i29, label %land.lhs.true.i
+
+land.lhs.true.i:                                  ; preds = %entry
+  %tobool5.i = icmp eq i32 undef, undef           ; <i1> [#uses=1]
+  br i1 %tobool5.i, label %cond.false.i29, label %bar.exit
+
+cond.false.i29:                                   ; preds = %land.lhs.true.i, %entry
+  %tmp = sub i8 0, %conv8                         ; <i8> [#uses=1]
+  %mul.i = and i8 %conv8, %tmp                    ; <i8> [#uses=1]
+  br label %bar.exit
+
+bar.exit:                                         ; preds = %cond.false.i29, %land.lhs.true.i
+  %call1231 = phi i8 [ %mul.i, %cond.false.i29 ], [ %conv8, %land.lhs.true.i ] ; <i8> [#uses=0]
+  %conv21 = trunc i16 %tmp17 to i8                ; <i8> [#uses=1]
+  store i8 %conv21, i8* @g_296
+  ret i8** undef
+}
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
new file mode 100644
index 0000000..7dddcda
--- /dev/null
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic | not grep {movaps\t_.str3}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic | FileCheck %s
+
+@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
+@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
+
+define void @func() nounwind ssp {
+entry:
+  %String2Loc = alloca [31 x i8], align 1
+  br label %bb
+
+bb:
+  %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
+  call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
+; CHECK: movups _.str3
+  br label %bb
+
+return:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+; CHECK: .align  3
+; CHECK-NEXT: _.str1:
+; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; CHECK-NEXT: .align 3
+; CHECK-NEXT: _.str3:
diff --git a/test/CodeGen/XCore/indirectbr.ll b/test/CodeGen/XCore/indirectbr.ll
new file mode 100644
index 0000000..a8f00cc
--- /dev/null
+++ b/test/CodeGen/XCore/indirectbr.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; CHECK: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; CHECK: bau
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; CHECK: ldap r11, .LBA3_foo_L5
+; CHECK: stw r11, dp[nextaddr]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp
index 21e096c..d603aa7 100644
--- a/test/FrontendC++/2009-07-15-LineNumbers.cpp
+++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp
@@ -1,7 +1,7 @@
 // This is a regression test on debug info to make sure that we can
 // print line numbers in asm.
 // RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
-// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {# SrcLine 25}
+// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {SrcLine 25}
 
 #include <stdlib.h>
 
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index eb82f98..0b9cb29 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -305,7 +305,8 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
         }
 
         // construct LTModule, hand over ownership of module and target
-        std::string FeatureStr = getFeatureString(Triple.c_str());
+        const std::string FeatureStr = 
+            SubtargetFeatures::getDefaultSubtargetFeatures(llvm::Triple(Triple));
         _target = march->createTargetMachine(Triple, FeatureStr);
     }
     return false;
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index e1cf48d..bce4162 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/Mangler.h"
@@ -120,27 +121,6 @@ LTOModule* LTOModule::makeLTOModule(const void* mem, size_t length,
     return makeLTOModule(buffer.get(), errMsg);
 }
 
-/// getFeatureString - Return a string listing the features associated with the
-/// target triple.
-///
-/// FIXME: This is an inelegant way of specifying the features of a
-/// subtarget. It would be better if we could encode this information into the
-/// IR. See <rdar://5972456>.
-std::string getFeatureString(const char *TargetTriple) {
-  InitializeAllTargets();
-
-  SubtargetFeatures Features;
-
-  if (strncmp(TargetTriple, "powerpc-apple-", 14) == 0) {
-    Features.AddFeature("altivec", true);
-  } else if (strncmp(TargetTriple, "powerpc64-apple-", 16) == 0) {
-    Features.AddFeature("64bit", true);
-    Features.AddFeature("altivec", true);
-  }
-
-  return Features.getString();
-}
-
 LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer,
                                     std::string& errMsg)
 {
@@ -161,7 +141,8 @@ LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer,
         return NULL;
 
     // construct LTModule, hand over ownership of module and target
-    std::string FeatureStr = getFeatureString(Triple.c_str());
+    const std::string FeatureStr = 
+        SubtargetFeatures::getDefaultSubtargetFeatures(llvm::Triple(Triple));
     TargetMachine* target = march->createTargetMachine(Triple, FeatureStr);
     return new LTOModule(m.take(), target);
 }
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index 8fd3915..4019e01 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -107,7 +107,5 @@ private:
     llvm::StringMap<NameAndAttributes>      _undefines;
 };
 
-extern std::string getFeatureString(const char *TargetTriple);
-
 #endif // LTO_MODULE_H
 
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index e568c62..d9ec6f7 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -13,6 +13,7 @@ add_executable(tblgen
   InstrInfoEmitter.cpp
   IntrinsicEmitter.cpp
   LLVMCConfigurationEmitter.cpp
+  OptParserEmitter.cpp
   Record.cpp
   RegisterInfoEmitter.cpp
   SubtargetEmitter.cpp
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
new file mode 100644
index 0000000..a09ba08
--- /dev/null
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -0,0 +1,192 @@
+//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OptParserEmitter.h"
+#include "Record.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+static int StrCmpOptionName(const char *A, const char *B) {
+  char a = *A, b = *B;
+  while (a == b) {
+    if (a == '\0')
+      return 0;
+
+    a = *++A;
+    b = *++B;
+  }
+
+  if (a == '\0') // A is a prefix of B.
+    return 1;
+  if (b == '\0') // B is a prefix of A.
+    return -1;
+
+  // Otherwise lexicographic.
+  return (a < b) ? -1 : 1;
+}
+
+static int CompareOptionRecords(const void *Av, const void *Bv) {
+  const Record *A = *(Record**) Av;
+  const Record *B = *(Record**) Bv;
+
+  // Compare options by name first.
+  if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").c_str(),
+                                 B->getValueAsString("Name").c_str()))
+    return Cmp;
+
+  // Then by the kind precedence;
+  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  assert(APrec != BPrec && "Options are equivalent!");
+  return APrec < BPrec ? -1 : 1;
+}
+
+static const std::string getOptionName(const Record &R) {
+  // Use the record name unless EnumName is defined.
+  if (dynamic_cast<UnsetInit*>(R.getValueInit("EnumName")))
+    return R.getName();
+
+  return R.getValueAsString("EnumName");
+}
+
+static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) {
+  OS << '"';
+  OS.write_escaped(Str);
+  OS << '"';
+  return OS;
+}
+
+void OptParserEmitter::run(raw_ostream &OS) {
+  // Get the option groups and options.
+  const std::vector<Record*> &Groups =
+    Records.getAllDerivedDefinitions("OptionGroup");
+  std::vector<Record*> Opts = Records.getAllDerivedDefinitions("Option");
+
+  if (GenDefs) {
+    OS << "\
+//=== TableGen'erated File - Option Parsing Definitions ---------*- C++ -*-===//\n \
+//\n\
+// Option Parsing Definitions\n\
+//\n\
+// Automatically generated file, do not edit!\n\
+//\n\
+//===----------------------------------------------------------------------===//\n";
+  } else {
+    OS << "\
+//=== TableGen'erated File - Option Parsing Table ---------------*- C++ -*-===//\n \
+//\n\
+// Option Parsing Definitions\n\
+//\n\
+// Automatically generated file, do not edit!\n\
+//\n\
+//===----------------------------------------------------------------------===//\n";
+  }
+  OS << "\n";
+
+  array_pod_sort(Opts.begin(), Opts.end(), CompareOptionRecords);
+  if (GenDefs) {
+    OS << "#ifndef OPTION\n";
+    OS << "#error \"Define OPTION prior to including this file!\"\n";
+    OS << "#endif\n\n";
+
+    OS << "/////////\n";
+    OS << "// Groups\n\n";
+    for (unsigned i = 0, e = Groups.size(); i != e; ++i) {
+      const Record &R = *Groups[i];
+
+      // Start a single option entry.
+      OS << "OPTION(";
+
+      // The option string.
+      OS << '"' << R.getValueAsString("Name") << '"';
+
+      // The option identifier name.
+      OS  << ", "<< getOptionName(R);
+
+      // The option kind.
+      OS << ", Group";
+
+      // The containing option group (if any).
+      OS << ", ";
+      if (const DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group")))
+        OS << getOptionName(*DI->getDef());
+      else
+        OS << "INVALID";
+
+      // The other option arguments (unused for groups).
+      OS << ", INVALID, 0, 0, 0, 0)\n";
+    }
+    OS << "\n";
+
+    OS << "//////////\n";
+    OS << "// Options\n\n";
+    for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+      const Record &R = *Opts[i];
+
+      // Start a single option entry.
+      OS << "OPTION(";
+
+      // The option string.
+      write_cstring(OS, R.getValueAsString("Name"));
+
+      // The option identifier name.
+      OS  << ", "<< getOptionName(R);
+
+      // The option kind.
+      OS << ", " << R.getValueAsDef("Kind")->getValueAsString("Name");
+
+      // The containing option group (if any).
+      OS << ", ";
+      if (const DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group")))
+        OS << getOptionName(*DI->getDef());
+      else
+        OS << "INVALID";
+
+      // The option alias (if any).
+      OS << ", ";
+      if (const DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Alias")))
+        OS << getOptionName(*DI->getDef());
+      else
+        OS << "INVALID";
+
+      // The option flags.
+      const ListInit *LI = R.getValueAsListInit("Flags");
+      if (LI->empty()) {
+        OS << ", 0";
+      } else {
+        OS << ", ";
+        for (unsigned i = 0, e = LI->size(); i != e; ++i) {
+          if (i)
+            OS << " | ";
+          OS << dynamic_cast<DefInit*>(LI->getElement(i))->getDef()->getName();
+        }
+      }
+
+      // The option parameter field.
+      OS << ", " << R.getValueAsInt("NumArgs");
+
+      // The option help text.
+      if (!dynamic_cast<UnsetInit*>(R.getValueInit("HelpText"))) {
+        OS << ",\n";
+        OS << "       ";
+        write_cstring(OS, R.getValueAsString("HelpText"));
+      } else
+        OS << ", 0";
+
+      // The option meta-variable name.
+      OS << ", ";
+      if (!dynamic_cast<UnsetInit*>(R.getValueInit("MetaVarName")))
+        write_cstring(OS, R.getValueAsString("MetaVarName"));
+      else
+        OS << "0";
+
+      OS << ")\n";
+    }
+  }
+}
diff --git a/utils/TableGen/OptParserEmitter.h b/utils/TableGen/OptParserEmitter.h
new file mode 100644
index 0000000..241a3f2
--- /dev/null
+++ b/utils/TableGen/OptParserEmitter.h
@@ -0,0 +1,34 @@
+//===- OptParserEmitter.h - Table Driven Command Line Parsing ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef UTILS_TABLEGEN_OPTPARSEREMITTER_H
+#define UTILS_TABLEGEN_OPTPARSEREMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+  /// OptParserEmitter - This tablegen backend takes an input .td file
+  /// describing a list of options and emits a data structure for parsing and
+  /// working with those options when given an input command line.
+  class OptParserEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+    bool GenDefs;
+
+  public:
+    OptParserEmitter(RecordKeeper &R, bool _GenDefs)
+      : Records(R), GenDefs(_GenDefs) {}
+
+    /// run - Output the option parsing information.
+    ///
+    /// \param GenHeader - Generate the header describing the option IDs.x
+    void run(raw_ostream &OS);
+  };
+}
+
+#endif
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index c6d7502..c6c4306 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -15,21 +15,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Record.h"
-#include "TGParser.h"
+#include "AsmMatcherEmitter.h"
+#include "AsmWriterEmitter.h"
 #include "CallingConvEmitter.h"
+#include "ClangDiagnosticsEmitter.h"
 #include "CodeEmitterGen.h"
-#include "RegisterInfoEmitter.h"
-#include "InstrInfoEmitter.h"
-#include "InstrEnumEmitter.h"
-#include "AsmWriterEmitter.h"
-#include "AsmMatcherEmitter.h"
 #include "DAGISelEmitter.h"
 #include "FastISelEmitter.h"
-#include "SubtargetEmitter.h"
+#include "InstrEnumEmitter.h"
+#include "InstrInfoEmitter.h"
 #include "IntrinsicEmitter.h"
 #include "LLVMCConfigurationEmitter.h"
-#include "ClangDiagnosticsEmitter.h"
+#include "OptParserEmitter.h"
+#include "Record.h"
+#include "RegisterInfoEmitter.h"
+#include "SubtargetEmitter.h"
+#include "TGParser.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -50,6 +51,7 @@ enum ActionType {
   GenClangDiagGroups,
   GenDAGISel,
   GenFastISel,
+  GenOptParserDefs, GenOptParserImpl,
   GenSubtarget,
   GenIntrinsic,
   GenTgtIntrinsic,
@@ -84,6 +86,10 @@ namespace {
                                "Generate a DAG instruction selector"),
                     clEnumValN(GenFastISel, "gen-fast-isel",
                                "Generate a \"fast\" instruction selector"),
+                    clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
+                               "Generate option definitions"),
+                    clEnumValN(GenOptParserImpl, "gen-opt-parser-impl",
+                               "Generate option parser implementation"),
                     clEnumValN(GenSubtarget, "gen-subtarget",
                                "Generate subtarget enumerations"),
                     clEnumValN(GenIntrinsic, "gen-intrinsic",
@@ -221,7 +227,13 @@ int main(int argc, char **argv) {
       break;
     case GenClangDiagGroups:
       ClangDiagGroupsEmitter(Records).run(*Out);
-      break;        
+      break;
+    case GenOptParserDefs:
+      OptParserEmitter(Records, true).run(*Out);
+      break;
+    case GenOptParserImpl:
+      OptParserEmitter(Records, false).run(*Out);
+      break;
     case GenDAGISel:
       DAGISelEmitter(Records).run(*Out);
       break;
diff --git a/utils/lit/TestFormats.py b/utils/lit/TestFormats.py
index f067bae..7305c79 100644
--- a/utils/lit/TestFormats.py
+++ b/utils/lit/TestFormats.py
@@ -119,8 +119,9 @@ class OneCommandPerFileTest:
             if not self.recursive:
                 subdirs[:] = []
 
-            if dirname == '.svn' or dirname in localConfig.excludes:
-                continue
+            subdirs[:] = [d for d in subdirs
+                          if (d != '.svn' and
+                              d not in localConfig.excludes)]
 
             for filename in filenames:
                 if (not self.pattern.match(filename) or
-- 
cgit v1.1