277 files changed, 6788 insertions, 4252 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 96e0608..9d0180b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -217,7 +217,9 @@ if( MSVC )
   # List of valid CRTs for MSVC
   set(MSVC_CRT
     MD
-    MDd)
+    MDd
+    MT
+    MTd)
 
   set(LLVM_USE_CRT "" CACHE STRING "Specify VC++ CRT to use for debug/release configurations.")
   add_llvm_definitions( -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS )
diff --git a/Makefile.rules b/Makefile.rules
index 20e642a..7f298a9 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -578,8 +578,6 @@ endif
 ifeq ($(TARGET_OS),Darwin)
   ifneq ($(ARCH),ARM)
     TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
-  else
-    TargetCommonOpts += -marm
   endif
 endif
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 22f15b7..56d716b 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,12 +31,12 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[2.7svn]],[llvmbugs@cs.uiuc.edu])
+AC_INIT([[llvm]],[[2.8svn]],[llvmbugs@cs.uiuc.edu])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
-AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."])
-AC_COPYRIGHT([Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.])
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.])
 
 dnl Indicate that we require autoconf 2.59 or later. Ths is needed because we
 dnl use some autoconf macros only available in 2.59.
@@ -62,6 +62,41 @@ dnl Configure all of the projects present in our source tree. While we could
 dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a
 dnl configure script, that usage of the AC_CONFIG_SUBDIRS macro is deprecated.
 dnl Instead we match on the known projects.
+
+dnl
+dnl One tricky part of doing this is that some projects depend upon other
+dnl projects.  For example, several projects rely upon the LLVM test suite.
+dnl We want to configure those projects first so that their object trees are
+dnl created before running the configure scripts of projects that depend upon
+dnl them.
+dnl
+
+dnl Several projects use llvm-gcc, so configure that first
+if test -d ${srcdir}/projects/llvm-gcc ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-gcc])
+fi
+
+dnl Several projects use the LLVM test suite, so configure it next.
+if test -d ${srcdir}/projects/test-suite ; then
+  AC_CONFIG_SUBDIRS([projects/test-suite])
+fi
+
+dnl llvm-test is the old name of the test-suite, kept here for backwards
+dnl compatibility
+if test -d ${srcdir}/projects/llvm-test ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-test])
+fi
+
+dnl Some projects use poolalloc; configure that next
+if test -d ${srcdir}/projects/poolalloc ; then
+  AC_CONFIG_SUBDIRS([projects/poolalloc])
+fi
+
+if test -d ${srcdir}/projects/llvm-poolalloc ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-poolalloc])
+fi
+
+dnl Check for all other projects
 for i in `ls ${srcdir}/projects`
 do
   if test -d ${srcdir}/projects/${i} ; then
@@ -70,16 +105,9 @@ do
       sample)       AC_CONFIG_SUBDIRS([projects/sample])    ;;
       privbracket)  AC_CONFIG_SUBDIRS([projects/privbracket]) ;;
       llvm-stacker) AC_CONFIG_SUBDIRS([projects/llvm-stacker]) ;;
-      # llvm-test is the old name of the test-suite, kept here for backwards
-      # compatibility
-      llvm-test)    AC_CONFIG_SUBDIRS([projects/llvm-test]) ;;
-      test-suite)   AC_CONFIG_SUBDIRS([projects/test-suite]) ;;
       llvm-reopt)   AC_CONFIG_SUBDIRS([projects/llvm-reopt]);;
-      llvm-gcc)     AC_CONFIG_SUBDIRS([projects/llvm-gcc])  ;;
       llvm-java)    AC_CONFIG_SUBDIRS([projects/llvm-java]) ;;
       llvm-tv)      AC_CONFIG_SUBDIRS([projects/llvm-tv])   ;;
-      llvm-poolalloc) AC_CONFIG_SUBDIRS([projects/llvm-poolalloc]) ;;
-      poolalloc)    AC_CONFIG_SUBDIRS([projects/poolalloc]) ;;
       safecode)     AC_CONFIG_SUBDIRS([projects/safecode]) ;;
       llvm-kernel)  AC_CONFIG_SUBDIRS([projects/llvm-kernel]) ;;
       *)
diff --git a/configure b/configure
index 175297e..a2aad3e 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for llvm 2.7svn.
+# Generated by GNU Autoconf 2.60 for llvm 2.8svn.
 #
 # Report bugs to <llvmbugs@cs.uiuc.edu>.
 #
@@ -9,7 +9,7 @@
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
+# Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
 ## --------------------- ##
 ## M4sh Initialization.  ##
 ## --------------------- ##
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='llvm'
 PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='2.7svn'
-PACKAGE_STRING='llvm 2.7svn'
+PACKAGE_VERSION='2.8svn'
+PACKAGE_STRING='llvm 2.8svn'
 PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
 
 ac_unique_file="lib/VMCore/Module.cpp"
@@ -802,17 +802,17 @@ CPP
 CXX
 CXXFLAGS
 CCC'
-ac_subdirs_all='projects/sample
+ac_subdirs_all='projects/llvm-gcc
+projects/test-suite
+projects/llvm-test
+projects/poolalloc
+projects/llvm-poolalloc
+projects/sample
 projects/privbracket
 projects/llvm-stacker
-projects/llvm-test
-projects/test-suite
 projects/llvm-reopt
-projects/llvm-gcc
 projects/llvm-java
 projects/llvm-tv
-projects/llvm-poolalloc
-projects/poolalloc
 projects/safecode
 projects/llvm-kernel'
 
@@ -1316,7 +1316,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures llvm 2.7svn to adapt to many kinds of systems.
+\`configure' configures llvm 2.8svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1382,7 +1382,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of llvm 2.7svn:";;
+     short | recursive ) echo "Configuration of llvm 2.8svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1533,7 +1533,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-llvm configure 2.7svn
+llvm configure 2.8svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1541,7 +1541,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
 _ACEOF
   exit
 fi
@@ -1549,7 +1549,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by llvm $as_me 2.7svn, which was
+It was created by llvm $as_me 2.8svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -1903,7 +1903,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-LLVM_COPYRIGHT="Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."
+LLVM_COPYRIGHT="Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."
 
 
 
@@ -1951,6 +1951,33 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;}
   fi
 fi
 
+
+
+if test -d ${srcdir}/projects/llvm-gcc ; then
+  subdirs="$subdirs projects/llvm-gcc"
+
+fi
+
+if test -d ${srcdir}/projects/test-suite ; then
+  subdirs="$subdirs projects/test-suite"
+
+fi
+
+if test -d ${srcdir}/projects/llvm-test ; then
+  subdirs="$subdirs projects/llvm-test"
+
+fi
+
+if test -d ${srcdir}/projects/poolalloc ; then
+  subdirs="$subdirs projects/poolalloc"
+
+fi
+
+if test -d ${srcdir}/projects/llvm-poolalloc ; then
+  subdirs="$subdirs projects/llvm-poolalloc"
+
+fi
+
 for i in `ls ${srcdir}/projects`
 do
   if test -d ${srcdir}/projects/${i} ; then
@@ -1962,24 +1989,12 @@ do
  ;;
       llvm-stacker) subdirs="$subdirs projects/llvm-stacker"
  ;;
-      # llvm-test is the old name of the test-suite, kept here for backwards
-      # compatibility
-      llvm-test)    subdirs="$subdirs projects/llvm-test"
- ;;
-      test-suite)   subdirs="$subdirs projects/test-suite"
- ;;
       llvm-reopt)   subdirs="$subdirs projects/llvm-reopt"
 ;;
-      llvm-gcc)     subdirs="$subdirs projects/llvm-gcc"
-  ;;
       llvm-java)    subdirs="$subdirs projects/llvm-java"
  ;;
       llvm-tv)      subdirs="$subdirs projects/llvm-tv"
    ;;
-      llvm-poolalloc) subdirs="$subdirs projects/llvm-poolalloc"
- ;;
-      poolalloc)    subdirs="$subdirs projects/poolalloc"
- ;;
       safecode)     subdirs="$subdirs projects/safecode"
  ;;
       llvm-kernel)  subdirs="$subdirs projects/llvm-kernel"
@@ -11136,7 +11151,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11139 "configure"
+#line 11154 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -20608,7 +20623,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by llvm $as_me 2.7svn, which was
+This file was extended by llvm $as_me 2.8svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20661,7 +20676,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-llvm config.status 2.7svn
+llvm config.status 2.8svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 5bdbceb..53a018a 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -10,6 +10,9 @@
 
 <div class="doc_title">LLVM 2.7 Release Notes</div>
 
+<img align=right src="http://llvm.org/img/DragonSmall.png"
+    width="136" height="136">
+
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
@@ -48,14 +51,18 @@ href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
 <p>For more information about LLVM, including information about the latest
 release, please check out the <a href="http://llvm.org/">main LLVM
 web site</a>.  If you have questions or comments, the <a
-href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developer's Mailing
-List</a> is a good place to send them.</p>
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developer's
+Mailing List</a> is a good place to send them.</p>
 
 <p>Note that if you are reading this file from a Subversion checkout or the
 main LLVM web page, this document applies to the <i>next</i> release, not the
 current one.  To see the release notes for a specific release, please see the
 <a href="http://llvm.org/releases/">releases page</a>.</p>
 
+
+<p>FIXME: llvm.org moved to new server, mention new logo, Ted and Doug new code
+   owners.</p>
+
 </div>
  
 
@@ -66,6 +73,7 @@ Almost dead code.
   llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8.
   ABCD, SCCVN, GEPSplitterPass
   MSIL backend?
+  lib/Transforms/Utils/SSI.cpp  -> ABCD depends on it.
 -->
  
    
@@ -78,8 +86,6 @@ Almost dead code.
   loop dependence analysis
   ELF Writer?  How stable?
   <li>PostRA scheduler improvements, ARM adoption (David Goodwin).</li>
-  2.7 supports the GDB 7.0 jit interfaces for debug info.
-  2.7 eliminates ADT/iterator.h
  -->
 
  <!-- for announcement email:
@@ -88,8 +94,7 @@ Almost dead code.
  compiler_rt
  KLEE web page at klee.llvm.org
  Many new papers added to /pubs/
-   Mention gcc plugin.
-
+ Mention gcc plugin.
    -->
 
 <!-- *********************************************************************** -->
@@ -123,6 +128,7 @@ development.  Here we include updates on these subprojects.
 
 <ul>
 <li>...</li>
+include a link to cxx_compatibility.html
 </ul>
 </div>
 
@@ -139,7 +145,7 @@ href="http://clang.llvm.org/StaticAnalysis.html">automatically finding bugs</a>
 in C and Objective-C programs. The tool performs checks to find
 bugs that occur on a specific path within a program.</p>
 
-<p>In the LLVM 2.7 time-frame, the analyzer core has ...</p>
+<p>In the LLVM 2.7 time-frame, the analyzer core has sprouted legs and...</p>
 
 </div>
 
@@ -192,24 +198,6 @@ License, a "BSD-style" license.</p>
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="klee">KLEE: Symbolic Execution and Automatic Test Case Generator</a>
-</div>
-
-<div class="doc_text">
-<p>
-The new LLVM <a href="http://klee.llvm.org/">KLEE project</a> is a symbolic
-execution framework for programs in LLVM bitcode form.  KLEE tries to
-symbolically evaluate "all" paths through the application and records state
-transitions that lead to fault states.  This allows it to construct testcases
-that lead to faults and can even be used to verify algorithms.  For more
-details, please see the <a
-href="http://llvm.org/pubs/2008-12-OSDI-KLEE.html">OSDI 2008 paper</a> about
-KLEE.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
 <a name="dragonegg">DragonEgg: GCC-4.5 as an LLVM frontend</a>
 </div>
 
@@ -257,6 +245,8 @@ The LLVM Machine Code (MC) Toolkit project is ...
 </div>
 
 <div class="doc_text">
+Need update.
+<!--
 <p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is an environment
 for running Ruby code which strives to write as much of the core class
 implementation in Ruby as possible. Combined with a bytecode interpreting VM, it
@@ -266,6 +256,7 @@ remove dynamism from ruby execution and increase performance.</p>
 
 <p>Since LLVM 2.5, Rubinius has made several major leaps forward, implementing
 a counter based JIT, type feedback and speculative method inlining.
+-->
 </p>
 
 </div>
@@ -278,6 +269,8 @@ a counter based JIT, type feedback and speculative method inlining.
 <div class="doc_text">
 
 <p>
+Need update.
+<!--
 <a href="http://macruby.org">MacRuby</a> is an implementation of Ruby on top of
 core Mac OS X technologies, such as the Objective-C common runtime and garbage
 collector and the CoreFoundation framework. It is principally developed by
@@ -287,7 +280,7 @@ Apple and aims at enabling the creation of full-fledged Mac OS X applications.
 <p>
 MacRuby uses LLVM for optimization passes, JIT and AOT compilation of Ruby
 expressions. It also uses zero-cost DWARF exceptions to implement Ruby exception
-handling.</p>
+handling.--> </p>
 
 </div>
 
@@ -308,9 +301,9 @@ built-in list and matrix support (including list and matrix comprehensions) and
 an easy-to-use C interface. The interpreter uses LLVM as a backend to
  JIT-compile Pure programs to fast native code.</p>
 
-<p>Pure versions ??? and later have been tested and are known to work with
-LLVM 2.7 (and continue to work with older LLVM releases >= 2.3 as well).
-</p>
+<p>Pure versions 0.43 and later have been tested and are known to work with
+LLVM 2.7 (and continue to work with older LLVM releases >= 2.5).</p>
+
 </div>
 
 
@@ -321,6 +314,8 @@ LLVM 2.7 (and continue to work with older LLVM releases >= 2.3 as well).
 
 <div class="doc_text">
 <p>
+Need update.
+<!--
 <a href="http://www.dsource.org/projects/ldc">LDC</a> is an implementation of
 the D Programming Language using the LLVM optimizer and code generator.
 The LDC project works great with the LLVM 2.6 release.  General improvements in
@@ -328,7 +323,7 @@ this
 cycle have included new inline asm constraint handling, better debug info
 support, general bug fixes and better x86-64 support.  This has allowed
 some major improvements in LDC, getting it much closer to being as
-fully featured as the original DMD compiler from DigitalMars.
+fully featured as the original DMD compiler from DigitalMars.-->
 </p>
 </div>
 
@@ -342,7 +337,8 @@ fully featured as the original DMD compiler from DigitalMars.
 <a href="http://code.roadsend.com/rphp">Roadsend PHP</a> (rphp) is an open
 source implementation of the PHP programming 
 language that uses LLVM for its optimizer, JIT and static compiler. This is a 
-reimplementation of an earlier project that is now based on LLVM.</p>
+reimplementation of an earlier project that is now based on LLVM.
+</p>
 </div>
 
 <!--=========================================================================-->
@@ -355,7 +351,8 @@ reimplementation of an earlier project that is now based on LLVM.</p>
 <a href="http://code.google.com/p/unladen-swallow/">Unladen Swallow</a> is a
 branch of <a href="http://python.org/">Python</a> intended to be fully
 compatible and significantly faster.  It uses LLVM's optimization passes and JIT
-compiler.</p>
+compiler.
+</p>
 </div>
 
 <!--=========================================================================-->
@@ -365,10 +362,13 @@ compiler.</p>
 
 <div class="doc_text">
 <p>
+Need update.
+<!--
 <a href="http://code.google.com/p/llvm-lua/">LLVM-Lua</a> uses LLVM to add JIT
 and static compiling support to the Lua VM.  Lua bytecode is analyzed to
 remove type checks, then LLVM is used to compile the bytecode down to machine
-code.</p>
+code.-->
+</p>
 </div>
 
 <!--=========================================================================-->
@@ -378,13 +378,15 @@ code.</p>
 
 <div class="doc_text">
 <p>
+Need update.
+<!--
 <a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
 harness to build OpenJDK using only free software build tools and to provide
 replacements for the not-yet free parts of OpenJDK.  One of the extensions that
 IcedTea provides is a new JIT compiler named <a
 href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
 to provide native code generation without introducing processor-dependent
-code.
+code.-->
 </p>
 </div>
 
@@ -418,6 +420,54 @@ in this section.
 <li>...</li>
 </ul>
 
+Extensible metadata solid.
+
+Debug info improvements: using metadata instead of llvm.dbg global variables.
+This brings several enhancements including improved compile times.
+
+New instruction selector.
+GHC Haskell ABI/ calling conv support.
+Pre-Alpha support for unions in IR.
+New InlineHint and StackAlignment function attributes
+Code generator MC'ized except for debug info and EH.
+New SCEV AA pass: -scev-aa
+Inliner reuses arrays allocas when inlining multiple callers to reduce stack usage.
+MC encoding and disassembler apis.
+Optimal Edge Profiling?
+Instcombine is now a library, has its own IRBuilder to simplify itself.
+New llvm/Support/Regex.h API.  FileCheck now does regex's
+Many subtle pointer invalidation bugs in Callgraph have been fixed and it now uses asserting value handles.
+MC Disassembler (with blog post), MCInstPrinter.  Many X86 backend and AsmPrinter simplifications
+Various tools like llc and opt now read either .ll or .bc files as input.
+Malloc and free instructions got removed.
+compiler-rt support for ARM.
+completely llvm-gcc NEON support.
+Can transcode from GAS to intel syntax with "llvm-mc foo.s -output-asm-variant=1"
+JIT debug information with GDB 7.0
+New CodeGen Level CSE
+CMake can now run tests, what other improvements?
+ARM/Thumb using reg scavenging for stack object address materialization (PEI).
+New SSAUpdater and MachineSSAUpdater classes for unstructured ssa updating,
+  changed jump threading, GVN, etc to use it which simplified them and speed
+  them up.
+Combiner-AA improvements, why not on by default?
+Pre-regalloc tail duplication
+x86 sibcall optimization
+New LSR with full strength reduction mode
+The most awesome sext / zext optimization pass. ?
+
+
+
+CondProp pass removed (functionality merged into jump threading).
+AndersAA got removed (from 2.7 or mainline?)
+PredSimplify, LoopVR, GVNPRE got removed.
+LLVM command line tools now overwrite their output, before they would only do this with -f.
+DOUT removed, use DEBUG(errs() instead.
+Much stuff converted to use raw_ostream instead of std::ostream.
+TargetAsmInfo renamed to MCAsmInfo
+llvm/ADT/iterator.h gone.
+
+
 </div>
 
 <!--=========================================================================-->
@@ -699,13 +749,7 @@ listed by component.  If you run into a problem, please check the <a
 href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
 there isn't already one.</p>
 
-<ul>
-<li>The llvm-gcc bootstrap will fail with some versions of binutils (e.g. 2.15)
-    with a message of "<tt><a href="http://llvm.org/PR5004">Error: can not do 8
-    byte pc-relative relocation</a></tt>" when building C++ code.  We intend to
-    fix this on mainline, but a workaround is to upgrade to binutils 2.17 or
-    later.</li>
-    
+<ul>    
 <li>LLVM will not correctly compile on Solaris and/or OpenSolaris
 using the stock GCC 3.x.x series 'out the box',
 See: <a href="GettingStarted.html#brokengcc">Broken versions of GCC and other tools</a>.
@@ -731,10 +775,11 @@ components, please contact us on the <a
 href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
 
 <ul>
-<li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430 and SystemZ backends are
-    experimental.</li>
+<li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430, SystemZ and MicroBlaze
+    backends are experimental.</li>
 <li>The <tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
-    supported value for this option.  The ELF writer is experimental.</li>
+    supported value for this option.  The MachO writer is experimental, and
+    works much better in mainline SVN.</li>
 </ul>
 
 </div>
@@ -865,7 +910,7 @@ appropriate nops inserted to ensure restartability.</li>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="c-fe">Known problems with the llvm-gcc C front-end</a>
+  <a name="c-fe">Known problems with the llvm-gcc C and C++ front-end</a>
 </div>
 
 <div class="doc_text">
@@ -883,24 +928,6 @@ appropriate nops inserted to ensure restartability.</li>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="c++-fe">Known problems with the llvm-gcc C++ front-end</a>
-</div>
-
-<div class="doc_text">
-
-<p>The C++ front-end is considered to be fully
-tested and works for a number of non-trivial programs, including LLVM
-itself, Qt, Mozilla, etc.</p>
-
-<ul>
-<li>Exception handling works well on the X86 and PowerPC targets. Currently
-  only Linux and Darwin targets are supported (both 32 and 64 bit).</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<div class="doc_subsection">
   <a name="fortran-fe">Known problems with the llvm-gcc Fortran front-end</a>
 </div>
 
@@ -997,7 +1024,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-01 20:29:17 +0100 (Mon, 01 Mar 2010) $
+  Last modified: $Date: 2010-03-19 04:18:05 +0100 (Fri, 19 Mar 2010) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 9af8fab..7fffece 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -237,7 +237,7 @@ height="369">
 <p>LLVM debugging information has been carefully designed to make it possible
    for the optimizer to optimize the program and debugging information without
    necessarily having to know anything about debugging information.  In
-   particular, te use of metadadta avoids duplicated dubgging information from
+   particular, the use of metadata avoids duplicated debugging information from
    the beginning, and the global dead code elimination pass automatically 
    deletes debugging information for a function if it decides to delete the 
    function. </p>
@@ -370,7 +370,7 @@ height="369">
 </pre>
 </div>
 
-<p>These descriptors contain informations for a file. Global variables and top
+<p>These descriptors contain information for a file. Global variables and top
    level functions would be defined using this context.k File descriptors also
    provide context for source line correspondence. </p>
 
@@ -967,7 +967,7 @@ call void @llvm.dbg.declare({ }* %2, metadata !12), !dbg !14
 </pre>
 </div>
 
-<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declaread at line number 5 and
+<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declared at line number 5 and
    column number 9 inside of lexical scope <tt>!13</tt>. The lexical scope
    itself resides inside of lexical scope <tt>!1</tt> described above.</p>
 
@@ -1762,7 +1762,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-09 01:44:10 +0100 (Tue, 09 Mar 2010) $
+  Last modified: $Date: 2010-03-17 16:01:50 +0100 (Wed, 17 Mar 2010) $
 </address>
 
 </body>
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index b9f2d83..3a86b0d 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -329,7 +329,8 @@ public:
     Size = RHS.size();
     unsigned RHSWords = NumBitWords(Size);
     if (Size <= Capacity * BITWORD_SIZE) {
-      std::copy(RHS.Bits, &RHS.Bits[RHSWords], Bits);
+      if (Size)
+        std::copy(RHS.Bits, &RHS.Bits[RHSWords], Bits);
       clear_unused_bits();
       return *this;
     }
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index f5f3d49..5f89823 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -16,6 +16,7 @@
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
 #include "llvm/System/DataTypes.h"
+#include <cassert>
 #include <set>
 
 namespace llvm {
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 81dc469..e8979bb 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -23,6 +23,7 @@
 namespace llvm {
   class APFloat;
   class APInt;
+  class BumpPtrAllocator;
 
 /// This folding set used for two purposes:
 ///   1. Given information about a node we want to create, look up the unique
@@ -198,6 +199,23 @@ template<typename T> struct FoldingSetTrait {
 };
 
 //===--------------------------------------------------------------------===//
+/// FoldingSetNodeIDRef - This class describes a reference to an interned
+/// FoldingSetNodeID, which can be a useful to store node id data rather
+/// than using plain FoldingSetNodeIDs, since the 32-element SmallVector
+/// is often much larger than necessary, and the possibility of heap
+/// allocation means it requires a non-trivial destructor call.
+class FoldingSetNodeIDRef {
+  unsigned* Data;
+  size_t Size;
+public:
+  FoldingSetNodeIDRef() : Data(0), Size(0) {}
+  FoldingSetNodeIDRef(unsigned *D, size_t S) : Data(D), Size(S) {}
+
+  unsigned *getData() const { return Data; }
+  size_t getSize() const { return Size; }
+};
+
+//===--------------------------------------------------------------------===//
 /// FoldingSetNodeID - This class is used to gather all the unique data bits of
 /// a node.  When all the bits are gathered this class is used to produce a
 /// hash value for the node.
@@ -210,11 +228,8 @@ class FoldingSetNodeID {
 public:
   FoldingSetNodeID() {}
 
-  /// getRawData - Return the ith entry in the Bits data.
-  ///
-  unsigned getRawData(unsigned i) const {
-    return Bits[i];
-  }
+  FoldingSetNodeID(FoldingSetNodeIDRef Ref)
+    : Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {}
 
   /// Add* - Add various data types to Bit data.
   ///
@@ -242,6 +257,11 @@ public:
   /// operator== - Used to compare two nodes to each other.
   ///
   bool operator==(const FoldingSetNodeID &RHS) const;
+
+  /// Intern - Copy this node's data to a memory region allocated from the
+  /// given allocator and return a FoldingSetNodeIDRef describing the
+  /// interned data.
+  FoldingSetNodeIDRef Intern(BumpPtrAllocator &Allocator) const;
 };
 
 // Convenience type to hide the implementation of the folding set.
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 89acefd..c2afb7e 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -57,17 +57,18 @@ protected:
   // something else.  An array of char would work great, but might not be
   // aligned sufficiently.  Instead, we either use GCC extensions, or some
   // number of union instances for the space, which guarantee maximal alignment.
+  struct U {
 #ifdef __GNUC__
-  typedef char U;
-  U FirstEl __attribute__((aligned));
+    char X __attribute__((aligned));
 #else
-  union U {
-    double D;
-    long double LD;
-    long long L;
-    void *P;
-  } FirstEl;
+    union {
+      double D;
+      long double LD;
+      long long L;
+      void *P;
+    } X;
 #endif
+  } FirstEl;
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
   
 protected:
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 96d29ba..ab13a9d 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -49,7 +49,11 @@ namespace llvm {
   /// are opaque objects that the client is not allowed to do much with
   /// directly.
   ///
-  class SCEV : public FastFoldingSetNode {
+  class SCEV : public FoldingSetNode {
+    /// FastID - A reference to an Interned FoldingSetNodeID for this node.
+    /// The ScalarEvolution's BumpPtrAllocator holds the data.
+    FoldingSetNodeIDRef FastID;
+
     // The SCEV baseclass this node corresponds to
     const unsigned short SCEVType;
 
@@ -64,11 +68,14 @@ namespace llvm {
   protected:
     virtual ~SCEV();
   public:
-    explicit SCEV(const FoldingSetNodeID &ID, unsigned SCEVTy) :
-      FastFoldingSetNode(ID), SCEVType(SCEVTy), SubclassData(0) {}
+    explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) :
+      FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
 
     unsigned getSCEVType() const { return SCEVType; }
 
+    /// Profile - FoldingSet support.
+    void Profile(FoldingSetNodeID& ID) { ID = FastID; }
+
     /// isLoopInvariant - Return true if the value of this SCEV is unchanging in
     /// the specified loop.
     virtual bool isLoopInvariant(const Loop *L) const = 0;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 26dc0c4..dc9b73b 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -79,12 +79,7 @@ namespace llvm {
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
     /// specified block.
-    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I) {
-      BasicBlock::iterator IP = I;
-      while (isInsertedInstruction(IP)) ++IP;
-      Builder.SetInsertPoint(IP->getParent(), IP);
-      return expandCodeFor(SH, Ty);
-    }
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I);
 
     /// setIVIncInsertPos - Set the current IV increment loop and position.
     void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
@@ -109,6 +104,13 @@ namespace llvm {
     /// is useful for late optimization passes.
     void disableCanonicalMode() { CanonicalMode = false; }
 
+    /// clearInsertPoint - Clear the current insertion point. This is useful
+    /// if the instruction that had been serving as the insertion point may
+    /// have been deleted.
+    void clearInsertPoint() {
+      Builder.ClearInsertionPoint();
+    }
+
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 0ab3b3f..7424203 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -37,7 +37,7 @@ namespace llvm {
     friend class ScalarEvolution;
 
     ConstantInt *V;
-    SCEVConstant(const FoldingSetNodeID &ID, ConstantInt *v) :
+    SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) :
       SCEV(ID, scConstant), V(v) {}
   public:
     ConstantInt *getValue() const { return V; }
@@ -81,7 +81,7 @@ namespace llvm {
     const SCEV *Op;
     const Type *Ty;
 
-    SCEVCastExpr(const FoldingSetNodeID &ID,
+    SCEVCastExpr(const FoldingSetNodeIDRef ID,
                  unsigned SCEVTy, const SCEV *op, const Type *ty);
 
   public:
@@ -120,7 +120,7 @@ namespace llvm {
   class SCEVTruncateExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVTruncateExpr(const FoldingSetNodeID &ID,
+    SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                      const SCEV *op, const Type *ty);
 
   public:
@@ -140,7 +140,7 @@ namespace llvm {
   class SCEVZeroExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+    SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
                        const SCEV *op, const Type *ty);
 
   public:
@@ -160,7 +160,7 @@ namespace llvm {
   class SCEVSignExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+    SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
                        const SCEV *op, const Type *ty);
 
   public:
@@ -180,25 +180,27 @@ namespace llvm {
   ///
   class SCEVNAryExpr : public SCEV {
   protected:
-    SmallVector<const SCEV *, 8> Operands;
+    // Since SCEVs are immutable, ScalarEvolution allocates operand
+    // arrays with its SCEVAllocator, so this class just needs a simple
+    // pointer rather than a more elaborate vector-like data structure.
+    // This also avoids the need for a non-trivial destructor.
+    const SCEV *const *Operands;
+    size_t NumOperands;
 
-    SCEVNAryExpr(const FoldingSetNodeID &ID,
-                 enum SCEVTypes T, const SmallVectorImpl<const SCEV *> &ops)
-      : SCEV(ID, T), Operands(ops.begin(), ops.end()) {}
+    SCEVNAryExpr(const FoldingSetNodeIDRef ID,
+                 enum SCEVTypes T, const SCEV *const *O, size_t N)
+      : SCEV(ID, T), Operands(O), NumOperands(N) {}
 
   public:
-    unsigned getNumOperands() const { return (unsigned)Operands.size(); }
+    size_t getNumOperands() const { return NumOperands; }
     const SCEV *getOperand(unsigned i) const {
-      assert(i < Operands.size() && "Operand index out of range!");
+      assert(i < NumOperands && "Operand index out of range!");
       return Operands[i];
     }
 
-    const SmallVectorImpl<const SCEV *> &getOperands() const {
-      return Operands;
-    }
-    typedef SmallVectorImpl<const SCEV *>::const_iterator op_iterator;
-    op_iterator op_begin() const { return Operands.begin(); }
-    op_iterator op_end() const { return Operands.end(); }
+    typedef const SCEV *const *op_iterator;
+    op_iterator op_begin() const { return Operands; }
+    op_iterator op_end() const { return Operands + NumOperands; }
 
     virtual bool isLoopInvariant(const Loop *L) const {
       for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
@@ -260,10 +262,9 @@ namespace llvm {
   ///
   class SCEVCommutativeExpr : public SCEVNAryExpr {
   protected:
-    SCEVCommutativeExpr(const FoldingSetNodeID &ID,
-                        enum SCEVTypes T,
-                        const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVNAryExpr(ID, T, ops) {}
+    SCEVCommutativeExpr(const FoldingSetNodeIDRef ID,
+                        enum SCEVTypes T, const SCEV *const *O, size_t N)
+      : SCEVNAryExpr(ID, T, O, N) {}
 
   public:
     virtual const char *getOperationStr() const = 0;
@@ -287,9 +288,9 @@ namespace llvm {
   class SCEVAddExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVAddExpr(const FoldingSetNodeID &ID,
-                const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scAddExpr, ops) {
+    SCEVAddExpr(const FoldingSetNodeIDRef ID,
+                const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scAddExpr, O, N) {
     }
 
   public:
@@ -315,9 +316,9 @@ namespace llvm {
   class SCEVMulExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVMulExpr(const FoldingSetNodeID &ID,
-                const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scMulExpr, ops) {
+    SCEVMulExpr(const FoldingSetNodeIDRef ID,
+                const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scMulExpr, O, N) {
     }
 
   public:
@@ -339,7 +340,7 @@ namespace llvm {
 
     const SCEV *LHS;
     const SCEV *RHS;
-    SCEVUDivExpr(const FoldingSetNodeID &ID, const SCEV *lhs, const SCEV *rhs)
+    SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs)
       : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {}
 
   public:
@@ -389,10 +390,10 @@ namespace llvm {
 
     const Loop *L;
 
-    SCEVAddRecExpr(const FoldingSetNodeID &ID,
-                   const SmallVectorImpl<const SCEV *> &ops, const Loop *l)
-      : SCEVNAryExpr(ID, scAddRecExpr, ops), L(l) {
-      for (size_t i = 0, e = Operands.size(); i != e; ++i)
+    SCEVAddRecExpr(const FoldingSetNodeIDRef ID,
+                   const SCEV *const *O, size_t N, const Loop *l)
+      : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {
+      for (size_t i = 0, e = NumOperands; i != e; ++i)
         assert(Operands[i]->isLoopInvariant(l) &&
                "Operands of AddRec must be loop-invariant!");
     }
@@ -471,9 +472,9 @@ namespace llvm {
   class SCEVSMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVSMaxExpr(const FoldingSetNodeID &ID,
-                 const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scSMaxExpr, ops) {
+    SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
+                 const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
       // Max never overflows.
       setHasNoUnsignedWrap(true);
       setHasNoSignedWrap(true);
@@ -496,9 +497,9 @@ namespace llvm {
   class SCEVUMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVUMaxExpr(const FoldingSetNodeID &ID,
-                 const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scUMaxExpr, ops) {
+    SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
+                 const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
       // Max never overflows.
       setHasNoUnsignedWrap(true);
       setHasNoSignedWrap(true);
@@ -523,7 +524,7 @@ namespace llvm {
     friend class ScalarEvolution;
 
     Value *V;
-    SCEVUnknown(const FoldingSetNodeID &ID, Value *v) :
+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *v) :
       SCEV(ID, scUnknown), V(v) {}
 
   public:
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index b8d04bf..1b6ab2c 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -83,9 +83,9 @@ public:
   /// getEntryAlignment - Return the alignment of each entry in the jump table.
   unsigned getEntryAlignment(const TargetData &TD) const;
   
-  /// getJumpTableIndex - Create a new jump table or return an existing one.
+  /// createJumpTableIndex - Create a new jump table.
   ///
-  unsigned getJumpTableIndex(const std::vector<MachineBasicBlock*> &DestBBs);
+  unsigned createJumpTableIndex(const std::vector<MachineBasicBlock*> &DestBBs);
   
   /// isEmpty - Return true if there are no jump tables.
   ///
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 80b7ca4..b1f1996 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -2516,6 +2516,11 @@ public:
   const Value *getCalledValue() const { return getOperand(0); }
         Value *getCalledValue()       { return getOperand(0); }
 
+  /// setCalledFunction - Set the function called.
+  void setCalledFunction(Value* Fn) {
+    Op<0>() = Fn;
+  }
+
   // get*Dest - Return the destination basic blocks...
   BasicBlock *getNormalDest() const {
     return cast<BasicBlock>(getOperand(1));
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 50ee358..67abd95 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -892,7 +892,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
   def int_x86_sse42_crc32_32         : GCCBuiltin<"__builtin_ia32_crc32si">,
           Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
                     [IntrNoMem]>;
-  def int_x86_sse42_crc32_64         : GCCBuiltin<"__builtin_ia32_crc32di">,
+  def int_x86_sse42_crc64_8         :
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc64_64         : GCCBuiltin<"__builtin_ia32_crc32di">,
           Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
                     [IntrNoMem]>;
 }
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 1d8051f..363c7d9 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -24,8 +24,10 @@ class raw_ostream;
 class MCAsmLayout;
 class MCAssembler;
 class MCContext;
+class MCCodeEmitter;
 class MCExpr;
 class MCFragment;
+class MCObjectWriter;
 class MCSection;
 class MCSectionData;
 class MCSymbol;
@@ -35,7 +37,8 @@ class TargetAsmBackend;
 /// MCAsmFixup - Represent a fixed size region of bytes inside some fragment
 /// which needs to be rewritten. This region will either be rewritten by the
 /// assembler or cause a relocation entry to be generated.
-struct MCAsmFixup {
+class MCAsmFixup {
+public:
   /// Offset - The offset inside the fragment which needs to be rewritten.
   uint64_t Offset;
 
@@ -45,14 +48,9 @@ struct MCAsmFixup {
   /// Kind - The fixup kind.
   MCFixupKind Kind;
 
-  /// FixedValue - The value to replace the fix up by.
-  //
-  // FIXME: This should not be here.
-  uint64_t FixedValue;
-
 public:
   MCAsmFixup(uint64_t _Offset, const MCExpr &_Value, MCFixupKind _Kind)
-    : Offset(_Offset), Value(&_Value), Kind(_Kind), FixedValue(0) {}
+    : Offset(_Offset), Value(&_Value), Kind(_Kind) {}
 };
 
 class MCFragment : public ilist_node<MCFragment> {
@@ -590,6 +588,8 @@ public:
   typedef SymbolDataListType::const_iterator const_symbol_iterator;
   typedef SymbolDataListType::iterator symbol_iterator;
 
+  typedef std::vector<IndirectSymbolData>::const_iterator
+    const_indirect_symbol_iterator;
   typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;
 
 private:
@@ -600,6 +600,8 @@ private:
 
   TargetAsmBackend &Backend;
 
+  MCCodeEmitter &Emitter;
+
   raw_ostream &OS;
 
   iplist<MCSectionData> Sections;
@@ -621,21 +623,6 @@ private:
   unsigned SubsectionsViaSymbols : 1;
 
 private:
-  /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
-  /// (increased in size, in order to hold its value correctly).
-  bool FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF);
-
-  /// LayoutSection - Assign offsets and sizes to the fragments in the section
-  /// \arg SD, and update the section size. The section file offset should
-  /// already have been computed.
-  void LayoutSection(MCSectionData &SD);
-
-  /// LayoutOnce - Perform one layout iteration and return true if any offsets
-  /// were adjusted.
-  bool LayoutOnce();
-
-  // FIXME: Make protected once we factor out object writer classes.
-public:
   /// Evaluate a fixup to a relocatable expression and the value which should be
   /// placed into the fixup.
   ///
@@ -653,6 +640,44 @@ public:
                      MCAsmFixup &Fixup, MCDataFragment *DF,
                      MCValue &Target, uint64_t &Value) const;
 
+  /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
+  /// (increased in size, in order to hold its value correctly).
+  bool FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF);
+
+  /// LayoutSection - Assign offsets and sizes to the fragments in the section
+  /// \arg SD, and update the section size. The section file offset should
+  /// already have been computed.
+  void LayoutSection(MCSectionData &SD);
+
+  /// LayoutOnce - Perform one layout iteration and return true if any offsets
+  /// were adjusted.
+  bool LayoutOnce();
+
+public:
+  /// Find the symbol which defines the atom containing given address, inside
+  /// the given section, or null if there is no such symbol.
+  //
+  // FIXME: Eliminate this, it is very slow.
+  const MCSymbolData *getAtomForAddress(const MCSectionData *Section,
+                                        uint64_t Address) const;
+
+  /// Find the symbol which defines the atom containing the given symbol, or
+  /// null if there is no such symbol.
+  //
+  // FIXME: Eliminate this, it is very slow.
+  const MCSymbolData *getAtom(const MCSymbolData *Symbol) const;
+
+  /// Check whether a particular symbol is visible to the linker and is required
+  /// in the symbol table, or whether it can be discarded by the assembler. This
+  /// also effects whether the assembler treats the label as potentially
+  /// defining a separate atom.
+  bool isSymbolLinkerVisible(const MCSymbolData *SD) const;
+
+  /// Emit the section contents using the given object writer.
+  //
+  // FIXME: Should MCAssembler always have a reference to the object writer?
+  void WriteSectionData(const MCSectionData *Section, MCObjectWriter *OW) const;
+
 public:
   /// Construct a new assembler instance.
   ///
@@ -662,13 +687,16 @@ public:
   // concrete and require clients to pass in a target like object. The other
   // option is to make this abstract, and have targets provide concrete
   // implementations as we do with AsmParser.
-  MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend, raw_ostream &OS);
+  MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
+              MCCodeEmitter &_Emitter, raw_ostream &OS);
   ~MCAssembler();
 
   MCContext &getContext() const { return Context; }
 
   TargetAsmBackend &getBackend() const { return Backend; }
 
+  MCCodeEmitter &getEmitter() const { return Emitter; }
+
   /// Finish - Do final processing and write the object to the output stream.
   void Finish();
 
@@ -723,10 +751,16 @@ public:
   indirect_symbol_iterator indirect_symbol_begin() {
     return IndirectSymbols.begin();
   }
+  const_indirect_symbol_iterator indirect_symbol_begin() const {
+    return IndirectSymbols.begin();
+  }
 
   indirect_symbol_iterator indirect_symbol_end() {
     return IndirectSymbols.end();
   }
+  const_indirect_symbol_iterator indirect_symbol_end() const {
+    return IndirectSymbols.end();
+  }
 
   size_t indirect_symbol_size() const { return IndirectSymbols.size(); }
 
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index fe1aff4..010a2e5 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -22,6 +22,12 @@ template<typename T> class SmallVectorImpl;
 
 /// MCFixupKindInfo - Target independent information on a fixup kind.
 struct MCFixupKindInfo {
+  enum FixupKindFlags {
+    /// Is this fixup kind PCrelative. This is used by the assembler backend to
+    /// evaluate fixup values in a target independent manner when possible.
+    FKF_IsPCRel = (1 << 0)
+  };
+
   /// A target specific name for the fixup kind. The names will be unique for
   /// distinct kinds on any given target.
   const char *Name;
@@ -36,6 +42,9 @@ struct MCFixupKindInfo {
   /// The number of bits written by this fixup. The bits are assumed to be
   /// contiguous.
   unsigned TargetSize;
+
+  /// Flags describing additional information on this fixup kind.
+  unsigned Flags;
 };
 
 /// MCCodeEmitter - Generic instruction encoding interface.
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 85114e3..c5814b3 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -73,9 +73,10 @@ namespace llvm {
     /// one if it does.
     ///
     /// @param Name - The symbol name, for debugging purposes only, temporary
-    /// symbols do not surive assembly. If non-empty the name must be unique
-    /// across all symbols.
-    MCSymbol *GetOrCreateTemporarySymbol(StringRef Name = "");
+    /// symbols do not surive assembly.
+    MCSymbol *GetOrCreateTemporarySymbol(StringRef Name) {
+      return GetOrCreateSymbol(Name, true);
+    }
     MCSymbol *GetOrCreateTemporarySymbol(const Twine &Name);
 
     /// LookupSymbol - Get the symbol for \p Name, or null.
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
new file mode 100644
index 0000000..d4fab0e
--- /dev/null
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -0,0 +1,162 @@
+//===-- llvm/MC/MCObjectWriter.h - Object File Writer Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTWRITER_H
+#define LLVM_MC_MCOBJECTWRITER_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmFixup;
+class MCAssembler;
+class MCDataFragment;
+class MCValue;
+class raw_ostream;
+
+/// MCObjectWriter - Defines the object file and target independent interfaces
+/// used by the assembler backend to write native file format object files.
+///
+/// The object writer contains a few callbacks used by the assembler to allow
+/// the object writer to modify the assembler data structures at appropriate
+/// points. Once assembly is complete, the object writer is given the
+/// MCAssembler instance, which contains all the symbol and section data which
+/// should be emitted as part of WriteObject().
+///
+/// The object writer also contains a number of helper methods for writing
+/// binary data to the output stream.
+class MCObjectWriter {
+  MCObjectWriter(const MCObjectWriter &); // DO NOT IMPLEMENT
+  void operator=(const MCObjectWriter &); // DO NOT IMPLEMENT
+
+protected:
+  raw_ostream &OS;
+
+  unsigned IsLittleEndian : 1;
+
+protected: // Can only create subclasses.
+  MCObjectWriter(raw_ostream &_OS, bool _IsLittleEndian)
+    : OS(_OS), IsLittleEndian(_IsLittleEndian) {}
+
+public:
+  virtual ~MCObjectWriter();
+
+  bool isLittleEndian() { return IsLittleEndian; }
+
+  raw_ostream &getStream() { return OS; }
+
+  /// @name High-Level API
+  /// @{
+
+  /// Perform any late binding of symbols (for example, to assign symbol indices
+  /// for use when generating relocations).
+  ///
+  /// This routine is called by the assembler after layout and relaxation is
+  /// complete.
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm) = 0;
+
+  /// Record a relocation entry.
+  ///
+  /// This routine is called by the assembler after layout and relaxation, and
+  /// post layout binding. The implementation is responsible for storing
+  /// information about the relocation so that it can be emitted during
+  /// WriteObject().
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCDataFragment &Fragment,
+                                const MCAsmFixup &Fixup, MCValue Target,
+                                uint64_t &FixedValue) = 0;
+
+  /// Write the object file.
+  ///
+  /// This routine is called by the assembler after layout and relaxation is
+  /// complete, fixups have been evaluate and applied, and relocations
+  /// generated.
+  virtual void WriteObject(const MCAssembler &Asm) = 0;
+
+  /// @}
+  /// @name Binary Output
+  /// @{
+
+  void Write8(uint8_t Value) {
+    OS << char(Value);
+  }
+
+  void WriteLE16(uint16_t Value) {
+    Write8(uint8_t(Value >> 0));
+    Write8(uint8_t(Value >> 8));
+  }
+
+  void WriteLE32(uint32_t Value) {
+    WriteLE16(uint16_t(Value >> 0));
+    WriteLE16(uint16_t(Value >> 16));
+  }
+
+  void WriteLE64(uint64_t Value) {
+    WriteLE32(uint32_t(Value >> 0));
+    WriteLE32(uint32_t(Value >> 32));
+  }
+
+  void WriteBE16(uint16_t Value) {
+    Write8(uint8_t(Value >> 8));
+    Write8(uint8_t(Value >> 0));
+  }
+
+  void WriteBE32(uint32_t Value) {
+    WriteBE16(uint16_t(Value >> 16));
+    WriteBE16(uint16_t(Value >> 0));
+  }
+
+  void WriteBE64(uint64_t Value) {
+    WriteBE32(uint32_t(Value >> 32));
+    WriteBE32(uint32_t(Value >> 0));
+  }
+
+  void Write16(uint16_t Value) {
+    if (IsLittleEndian)
+      WriteLE16(Value);
+    else
+      WriteBE16(Value);
+  }
+
+  void Write32(uint32_t Value) {
+    if (IsLittleEndian)
+      WriteLE32(Value);
+    else
+      WriteBE32(Value);
+  }
+
+  void Write64(uint64_t Value) {
+    if (IsLittleEndian)
+      WriteLE64(Value);
+    else
+      WriteBE64(Value);
+  }
+
+  void WriteZeros(unsigned N) {
+    const char Zeros[16] = { 0 };
+
+    for (unsigned i = 0, e = N / 16; i != e; ++i)
+      OS << StringRef(Zeros, 16);
+
+    OS << StringRef(Zeros, N % 16);
+  }
+
+  void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    OS << Str;
+    if (ZeroFillSize)
+      WriteZeros(ZeroFillSize - Str.size());
+  }
+
+  /// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 47befca..4b088a5 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -291,7 +291,8 @@ class TargetAsmBackend;
   /// assembler.
   ///
   /// \param InstPrint - If given, the instruction printer to use. If not given
-  /// the MCInst representation will be printed.
+  /// the MCInst representation will be printed.  This method takes ownership of
+  /// InstPrint.
   ///
   /// \param CE - If given, a code emitter to use to show the instruction
   /// encoding inline with the assembly.
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index e41eb2a..fb96506 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -28,8 +28,7 @@ namespace llvm {
   ///
   /// If the symbol is defined/emitted into the current translation unit, the
   /// Section member is set to indicate what section it lives in.  Otherwise, if
-  /// it is a reference to an external entity, it has a null section.  
-  /// 
+  /// it is a reference to an external entity, it has a null section.
   class MCSymbol {
     // Special sentinal value for the absolute pseudo section.
     //
@@ -52,7 +51,7 @@ namespace llvm {
     /// typically does not survive in the .o file's symbol table.  Usually
     /// "Lfoo" or ".foo".
     unsigned IsTemporary : 1;
-    
+
   private:  // MCContext creates and uniques these.
     friend class MCContext;
     MCSymbol(StringRef name, bool isTemporary)
@@ -83,6 +82,12 @@ namespace llvm {
       return Section != 0;
     }
 
+    /// isInSection - Check if this symbol is defined in some section (i.e., it
+    /// is defined but not absolute).
+    bool isInSection() const {
+      return isDefined() && !isAbsolute();
+    }
+
     /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
     bool isUndefined() const {
       return !isDefined();
@@ -96,7 +101,7 @@ namespace llvm {
     /// getSection - Get the section associated with a defined, non-absolute
     /// symbol.
     const MCSection &getSection() const {
-      assert(!isUndefined() && !isAbsolute() && "Invalid accessor!");
+      assert(isInSection() && "Invalid accessor!");
       return *Section;
     }
 
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 8aa73f3..11b6c2a 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -19,8 +19,9 @@
 #include <cassert>
 
 namespace llvm {
-class MCSymbol;
 class MCAsmInfo;
+class MCSymbol;
+class MCSymbolRefExpr;
 class raw_ostream;
 
 /// MCValue - This represents an "assembler immediate".  In its most general
@@ -34,13 +35,13 @@ class raw_ostream;
 /// Note that this class must remain a simple POD value class, because we need
 /// it to live in unions etc.
 class MCValue {
-  const MCSymbol *SymA, *SymB;
+  const MCSymbolRefExpr *SymA, *SymB;
   int64_t Cst;
 public:
 
   int64_t getConstant() const { return Cst; }
-  const MCSymbol *getSymA() const { return SymA; }
-  const MCSymbol *getSymB() const { return SymB; }
+  const MCSymbolRefExpr *getSymA() const { return SymA; }
+  const MCSymbolRefExpr *getSymB() const { return SymB; }
 
   /// isAbsolute - Is this an absolute (as opposed to relocatable) value.
   bool isAbsolute() const { return !SymA && !SymB; }
@@ -57,11 +58,11 @@ public:
 
   /// print - Print the value to the stream \arg OS.
   void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
-  
+
   /// dump - Print the value to stderr.
   void dump() const;
 
-  static MCValue get(const MCSymbol *SymA, const MCSymbol *SymB = 0,
+  static MCValue get(const MCSymbolRefExpr *SymA, const MCSymbolRefExpr *SymB=0,
                      int64_t Val = 0) {
     MCValue R;
     assert((!SymB || SymA) && "Invalid relocatable MCValue!");
@@ -70,7 +71,7 @@ public:
     R.SymB = SymB;
     return R;
   }
-  
+
   static MCValue get(int64_t Val) {
     MCValue R;
     R.Cst = Val;
@@ -78,7 +79,7 @@ public:
     R.SymB = 0;
     return R;
   }
-  
+
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MachObjectWriter.h b/include/llvm/MC/MachObjectWriter.h
new file mode 100644
index 0000000..3e3305f
--- /dev/null
+++ b/include/llvm/MC/MachObjectWriter.h
@@ -0,0 +1,43 @@
+//===-- llvm/MC/MachObjectWriter.h - Mach-O File Writer ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MACHOBJECTWRITER_H
+#define LLVM_MC_MACHOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmFixup;
+class MCAssembler;
+class MCDataFragment;
+class MCValue;
+class raw_ostream;
+
+class MachObjectWriter : public MCObjectWriter {
+  void *Impl;
+
+public:
+  MachObjectWriter(raw_ostream &OS, bool Is64Bit, bool IsLittleEndian = true);
+  virtual ~MachObjectWriter();
+
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm);
+
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCDataFragment &Fragment,
+                                const MCAsmFixup &Fixup, MCValue Target,
+                                uint64_t &FixedValue);
+
+  virtual void WriteObject(const MCAssembler &Asm);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index b0ed33d..b1f59dc 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -15,9 +15,12 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/System/DataTypes.h"
+#include <algorithm>
 #include <cassert>
 #include <cstdlib>
+#include <cstddef>
 
 namespace llvm {
 
@@ -175,4 +178,22 @@ public:
 
 }  // end namespace llvm
 
+inline void *operator new(size_t Size, llvm::BumpPtrAllocator &Allocator) {
+  struct S {
+    char c;
+#ifdef __GNUC__
+    char x __attribute__((aligned));
+#else
+    union {
+      double D;
+      long double LD;
+      long long L;
+      void *P;
+    } x;
+#endif
+  };
+  return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size),
+                                           offsetof(S, x)));
+}
+
 #endif // LLVM_SUPPORT_ALLOCATOR_H
diff --git a/include/llvm/Support/RecyclingAllocator.h b/include/llvm/Support/RecyclingAllocator.h
index 609193f..49f7753 100644
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@@ -56,4 +56,11 @@ public:
 
 }
 
+template<class AllocatorType, class T, size_t Size, size_t Align>
+inline void *operator new(size_t,
+                          llvm::RecyclingAllocator<AllocatorType,
+                                                   T, Size, Align> &Allocator) {
+  return Allocator.Allocate();
+}
+
 #endif
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 0cffffb..0a7f549 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -389,66 +389,66 @@ class InstrInfo {
 // Standard Pseudo Instructions.
 let isCodeGenOnly = 1 in {
 def PHI : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "PHINODE";
   let Namespace = "TargetOpcode";
 }
 def INLINEASM : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "";
   let Namespace = "TargetOpcode";
 }
 def DBG_LABEL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops i32imm:$id);
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let hasCtrlDep = 1;
   let isNotDuplicable = 1;
 }
 def EH_LABEL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops i32imm:$id);
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let hasCtrlDep = 1;
   let isNotDuplicable = 1;
 }
 def GC_LABEL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops i32imm:$id);
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let hasCtrlDep = 1;
   let isNotDuplicable = 1;
 }
 def KILL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
 }
 def EXTRACT_SUBREG : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$supersrc, i32imm:$subidx);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$supersrc, i32imm:$subidx);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
 }
 def INSERT_SUBREG : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$supersrc, unknown:$subsrc, i32imm:$subidx);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
   let Constraints = "$supersrc = $dst";
 }
 def IMPLICIT_DEF : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
@@ -456,23 +456,23 @@ def IMPLICIT_DEF : Instruction {
   let isAsCheapAsAMove = 1;
 }
 def SUBREG_TO_REG : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$implsrc, unknown:$subsrc, i32imm:$subidx);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
 }
 def COPY_TO_REGCLASS : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$src, i32imm:$regclass);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$src, i32imm:$regclass);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
 }
 def DBG_VALUE : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "DBG_VALUE";
   let Namespace = "TargetOpcode";
   let isAsCheapAsAMove = 1;
@@ -491,6 +491,11 @@ class AsmParser {
   // class.  Generated AsmParser classes are always prefixed with the target
   // name.
   string AsmParserClassName  = "AsmParser";
+
+  // AsmParserInstCleanup - If non-empty, this is the name of a custom function on the
+  // AsmParser class to call on every matched instruction. This can be used to
+  // perform target specific instruction post-processing.
+  string AsmParserInstCleanup  = "";
  
   // Variant - AsmParsers can be of multiple different variants.  Variants are
   // used to support targets that need to parser multiple formats for the 
diff --git a/include/llvm/Target/TargetAsmBackend.h b/include/llvm/Target/TargetAsmBackend.h
index 35a995f..bb501cc 100644
--- a/include/llvm/Target/TargetAsmBackend.h
+++ b/include/llvm/Target/TargetAsmBackend.h
@@ -10,9 +10,15 @@
 #ifndef LLVM_TARGET_TARGETASMBACKEND_H
 #define LLVM_TARGET_TARGETASMBACKEND_H
 
+#include "llvm/System/DataTypes.h"
+
 namespace llvm {
+class MCAsmFixup;
+class MCDataFragment;
+class MCObjectWriter;
 class MCSection;
 class Target;
+class raw_ostream;
 
 /// TargetAsmBackend - Generic interface to target specific assembler backends.
 class TargetAsmBackend {
@@ -24,11 +30,19 @@ protected: // Can only create subclasses.
   /// TheTarget - The Target that this machine was created for.
   const Target &TheTarget;
 
+  unsigned HasAbsolutizedSet : 1;
+  unsigned HasReliableSymbolDifference : 1;
+  unsigned HasScatteredSymbols : 1;
+
 public:
   virtual ~TargetAsmBackend();
 
   const Target &getTarget() const { return TheTarget; }
 
+  /// createObjectWriter - Create a new MCObjectWriter instance for use by the
+  /// assembler backend to emit the final object file.
+  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
+
   /// hasAbsolutizedSet - Check whether this target "absolutizes"
   /// assignments. That is, given code like:
   ///   a:
@@ -40,7 +54,21 @@ public:
   /// value of L0 - L1. This distinction is only relevant for platforms that
   /// support scattered symbols, since in the absence of scattered symbols (a -
   /// b) cannot change after assembly.
-  virtual bool hasAbsolutizedSet() const { return false; }
+  bool hasAbsolutizedSet() const { return HasAbsolutizedSet; }
+
+  /// hasReliableSymbolDifference - Check whether this target implements
+  /// accurate relocations for differences between symbols. If not, differences
+  /// between symbols will always be relocatable expressions and any references
+  /// to temporary symbols will be assumed to be in the same atom, unless they
+  /// reside in a different section.
+  ///
+  /// This should always be true (since it results in fewer relocations with no
+  /// loss of functionality), but is currently supported as a way to maintain
+  /// exact object compatibility with Darwin 'as' (on non-x86_64). It should
+  /// eventually should be eliminated. See also \see hasAbsolutizedSet.
+  bool hasReliableSymbolDifference() const {
+    return HasReliableSymbolDifference;
+  }
 
   /// hasScatteredSymbols - Check whether this target supports scattered
   /// symbols. If so, the assembler should assume that atoms can be scattered by
@@ -50,13 +78,23 @@ public:
   ///
   /// Note that the assembler currently does not reason about atoms, instead it
   /// assumes all temporary symbols reside in the "current atom".
-  virtual bool hasScatteredSymbols() const { return false; }
+  bool hasScatteredSymbols() const { return HasScatteredSymbols; }
 
   /// doesSectionRequireSymbols - Check whether the given section requires that
   /// all symbols (even temporaries) have symbol table entries.
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
     return false;
   }
+
+  /// isVirtualSection - Check whether the given section is "virtual", that is
+  /// has no actual object file contents.
+  virtual bool isVirtualSection(const MCSection &Section) const = 0;
+
+  /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+  /// data fragment, at the offset specified by the fixup and following the
+  /// fixup kind as appropriate.
+  virtual void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &Fragment,
+                          uint64_t Value) const = 0;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index b19c20a..da0f686 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -307,7 +307,7 @@ public:
   /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If
   /// this is the case, it returns true and store the intrinsic
   /// information into the IntrinsicInfo that was passed to the function.
-  typedef struct IntrinsicInfo { 
+  struct IntrinsicInfo { 
     unsigned     opc;         // target opcode
     EVT          memVT;       // memory VT
     const Value* ptrVal;      // value representing memory location
@@ -316,9 +316,9 @@ public:
     bool         vol;         // is volatile?
     bool         readMem;     // reads memory?
     bool         writeMem;    // writes memory?
-  } IntrinisicInfo;
+  };
 
-  virtual bool getTgtMemIntrinsic(IntrinsicInfo& Info,
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
                                   CallInst &I, unsigned Intrinsic) {
     return false;
   }
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index b63c2bf..a01a67f 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -144,11 +144,6 @@ namespace llvm {
   /// wth earlier copy coalescing.
   extern bool StrongPHIElim;
 
-  /// DisableScheduling - This flag disables instruction scheduling. In
-  /// particular, it assigns an ordering to the SDNodes, which the scheduler
-  /// uses instead of its normal heuristics to perform scheduling.
-  extern bool DisableScheduling;
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index 4373863..6b6dad8 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -55,7 +55,7 @@ namespace llvm {
 
     typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT);
 
-    typedef const MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
+    typedef MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
                                                 StringRef TT);
     typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T,
                                                   const std::string &TT,
@@ -68,7 +68,7 @@ namespace llvm {
     typedef TargetAsmLexer *(*AsmLexerCtorTy)(const Target &T,
                                               const MCAsmInfo &MAI);
     typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,MCAsmParser &P);
-    typedef const MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
+    typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
     typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
                                                   unsigned SyntaxVariant,
                                                   const MCAsmInfo &MAI,
@@ -184,7 +184,7 @@ namespace llvm {
     /// feature set; it should always be provided. Generally this should be
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
-    const MCAsmInfo *createAsmInfo(StringRef Triple) const {
+    MCAsmInfo *createAsmInfo(StringRef Triple) const {
       if (!AsmInfoCtorFn)
         return 0;
       return AsmInfoCtorFn(*this, Triple);
@@ -241,7 +241,7 @@ namespace llvm {
       return AsmPrinterCtorFn(OS, TM, Streamer);
     }
 
-    const MCDisassembler *createMCDisassembler() const {
+    MCDisassembler *createMCDisassembler() const {
       if (!MCDisassemblerCtorFn)
         return 0;
       return MCDisassemblerCtorFn(*this);
@@ -529,7 +529,7 @@ namespace llvm {
       TargetRegistry::RegisterAsmInfo(T, &Allocator);
     }
   private:
-    static const MCAsmInfo *Allocator(const Target &T, StringRef TT) {
+    static MCAsmInfo *Allocator(const Target &T, StringRef TT) {
       return new MCAsmInfoImpl(T, TT);
     }
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index c718c86..e56d886 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -219,6 +219,7 @@ def SDNPMayStore    : SDNodeProperty;   // May write to memory, sets 'mayStore'.
 def SDNPMayLoad     : SDNodeProperty;   // May read memory, sets 'mayLoad'.
 def SDNPSideEffect  : SDNodeProperty;   // Sets 'HasUnmodelledSideEffects'.
 def SDNPMemOperand  : SDNodeProperty;   // Touches memory, has assoc MemOperand
+def SDNPVariadic    : SDNodeProperty;   // Node has variable arguments.
 
 //===----------------------------------------------------------------------===//
 // Selection DAG Node definitions.
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 96bb027..dda1fba 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -564,21 +564,6 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
 
   unsigned BitWidth =
     TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext()));
-  APInt BasePtr(BitWidth, 0);
-  bool BaseIsInt = true;
-  if (!Ptr->isNullValue()) {
-    // If this is a inttoptr from a constant int, we can fold this as the base,
-    // otherwise we can't.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
-      if (CE->getOpcode() == Instruction::IntToPtr)
-        if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
-          BasePtr = Base->getValue();
-          BasePtr.zextOrTrunc(BitWidth);
-        }
-    
-    if (BasePtr == 0)
-      BaseIsInt = false;
-  }
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -615,7 +600,14 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
 
   // If the base value for this address is a literal integer value, fold the
   // getelementptr to the resulting integer value casted to the pointer type.
-  if (BaseIsInt) {
+  APInt BasePtr(BitWidth, 0);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+        BasePtr = Base->getValue();
+        BasePtr.zextOrTrunc(BitWidth);
+      }
+  if (Ptr->isNullValue() || BasePtr != 0) {
     Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
     return ConstantExpr::getIntToPtr(C, ResultTy);
   }
@@ -1002,6 +994,8 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::usub_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
+  case Intrinsic::convert_from_fp16:
+  case Intrinsic::convert_to_fp16:
     return true;
   default:
     return false;
@@ -1082,6 +1076,15 @@ llvm::ConstantFoldCall(Function *F,
   const Type *Ty = F->getReturnType();
   if (NumOperands == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+      if (Name == "llvm.convert.to.fp16") {
+        APFloat Val(Op->getValueAPF());
+
+        bool lost = false;
+        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+        return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+      }
+
       if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
       /// Currently APFloat versions of these functions do not exist, so we use
@@ -1166,6 +1169,20 @@ llvm::ConstantFoldCall(Function *F,
         return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
       else if (Name.startswith("llvm.ctlz"))
         return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+      else if (Name == "llvm.convert.from.fp16") {
+        APFloat Val(Op->getValue());
+
+        bool lost = false;
+        APFloat::opStatus status =
+          Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+        // Conversion is always precise.
+        status = status;
+        assert(status == APFloat::opOK && !lost &&
+               "Precision lost during fp16 constfolding");
+
+        return ConstantFP::get(F->getContext(), Val);
+      }
       return 0;
     }
     
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index bb4f46d..e101947 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -119,8 +119,7 @@ bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
   P = Pairs.FindNodeOrInsertPos(id, insertPos);
   if (P) return true;
 
-  P = PairAllocator.Allocate<DependencePair>();
-  new (P) DependencePair(id, A, B);
+  P = new (PairAllocator) DependencePair(id, A, B);
   Pairs.InsertNode(P, insertPos);
   return false;
 }
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 15f072d..1af271a 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -141,7 +141,7 @@ bool SCEV::isAllOnesValue() const {
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
-  SCEV(FoldingSetNodeID(), scCouldNotCompute) {}
+  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
 
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
   llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
@@ -177,8 +177,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
   ID.AddPointer(V);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVConstant>();
-  new (S) SCEVConstant(ID, V);
+  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -199,7 +198,7 @@ void SCEVConstant::print(raw_ostream &OS) const {
   WriteAsOperand(OS, V, false);
 }
 
-SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID,
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
                            unsigned SCEVTy, const SCEV *op, const Type *ty)
   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 
@@ -211,7 +210,7 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->properlyDominates(BB, DT);
 }
 
-SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                                    const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scTruncate, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
@@ -223,7 +222,7 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
   OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
@@ -235,7 +234,7 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
   OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scSignExtend, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
@@ -248,10 +247,10 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const {
 }
 
 void SCEVCommutativeExpr::print(raw_ostream &OS) const {
-  assert(Operands.size() > 1 && "This plus expr shouldn't exist!");
+  assert(NumOperands > 1 && "This plus expr shouldn't exist!");
   const char *OpStr = getOperationStr();
   OS << "(" << *Operands[0];
-  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 1, e = NumOperands; i != e; ++i)
     OS << OpStr << *Operands[i];
   OS << ")";
 }
@@ -329,7 +328,7 @@ SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
 
 void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "{" << *Operands[0];
-  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 1, e = NumOperands; i != e; ++i)
     OS << ",+," << *Operands[i];
   OS << "}<";
   WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
@@ -846,8 +845,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>();
-  new (S) SCEVTruncateExpr(ID, Op, Ty);
+  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+                                                 Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -981,8 +980,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>();
-  new (S) SCEVZeroExtendExpr(ID, Op, Ty);
+  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1116,8 +1115,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>();
-  new (S) SCEVSignExtendExpr(ID, Op, Ty);
+  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1202,23 +1201,23 @@ static bool
 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
                              SmallVector<const SCEV *, 8> &NewOps,
                              APInt &AccumulatedConstant,
-                             const SmallVectorImpl<const SCEV *> &Ops,
+                             const SCEV *const *Ops, size_t NumOperands,
                              const APInt &Scale,
                              ScalarEvolution &SE) {
   bool Interesting = false;
 
   // Iterate over the add operands.
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+  for (unsigned i = 0, e = NumOperands; i != e; ++i) {
     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
       APInt NewScale =
         Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
         // A multiplication of a constant with another add; recurse.
+        const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
         Interesting |=
           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
-                                       cast<SCEVAddExpr>(Mul->getOperand(1))
-                                         ->getOperands(),
+                                       Add->op_begin(), Add->getNumOperands(),
                                        NewScale, SE);
       } else {
         // A multiplication of a constant with some other value. Update
@@ -1427,7 +1426,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     SmallVector<const SCEV *, 8> NewOps;
     APInt AccumulatedConstant(BitWidth, 0);
     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
-                                     Ops, APInt(BitWidth, 1), *this)) {
+                                     Ops.data(), Ops.size(),
+                                     APInt(BitWidth, 1), *this)) {
       // Some interesting folding opportunity is present, so its worthwhile to
       // re-generate the operands list. Group the operands by constant scale,
       // to avoid multiplying by the same constant scale multiple times.
@@ -1611,8 +1611,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   SCEVAddExpr *S =
     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
-    S = SCEVAllocator.Allocate<SCEVAddExpr>();
-    new (S) SCEVAddExpr(ID, Ops);
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
@@ -1819,8 +1821,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   SCEVMulExpr *S =
     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
-    S = SCEVAllocator.Allocate<SCEVMulExpr>();
-    new (S) SCEVMulExpr(ID, Ops);
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
@@ -1880,9 +1884,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           const SCEV *Op = M->getOperand(i);
           const SCEV *Div = getUDivExpr(Op, RHSC);
           if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-            Operands = SmallVector<const SCEV *, 4>(MOperands.begin(),
-                                                  MOperands.end());
+            Operands = SmallVector<const SCEV *, 4>(M->op_begin(), M->op_end());
             Operands[i] = Div;
             return getMulExpr(Operands);
           }
@@ -1921,8 +1923,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   ID.AddPointer(RHS);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>();
-  new (S) SCEVUDivExpr(ID, LHS, RHS);
+  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+                                             LHS, RHS);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2030,8 +2032,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   SCEVAddRecExpr *S =
     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
-    S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
-    new (S) SCEVAddRecExpr(ID, Operands, L);
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+    std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+    S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+                                           O, Operands.size(), L);
     UniqueSCEVs.InsertNode(S, IP);
   }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
@@ -2130,8 +2134,10 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>();
-  new (S) SCEVSMaxExpr(ID, Ops);
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2227,8 +2233,10 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>();
-  new (S) SCEVUMaxExpr(ID, Ops);
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2290,8 +2298,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
   ID.AddPointer(V);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>();
-  new (S) SCEVUnknown(ID, V);
+  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 3c2cbfb..138cdc6 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -232,9 +232,7 @@ static bool FactorOutConstant(const SCEV *&S,
       const SCEVConstant *FC = cast<SCEVConstant>(Factor);
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
         if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
-          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
-                                                 MOperands.end());
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
           NewMulOps[0] =
             SE.getConstant(C->getValue()->getValue().sdiv(
                                                    FC->getValue()->getValue()));
@@ -249,9 +247,7 @@ static bool FactorOutConstant(const SCEV *&S,
         const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType());
         if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
             Remainder->isZero()) {
-          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
-                                                 MOperands.end());
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
           NewMulOps[i] = SOp;
           S = SE.getMulExpr(NewMulOps);
           return true;
@@ -297,13 +293,11 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
                     SE.getAddExpr(NoAddRecs);
   // If it returned an add, use the operands. Otherwise it simplified
   // the sum into a single value, so just use that.
+  Ops.clear();
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
-    Ops = Add->getOperands();
-  else {
-    Ops.clear();
-    if (!Sum->isZero())
-      Ops.push_back(Sum);
-  }
+    Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+  else if (!Sum->isZero())
+    Ops.push_back(Sum);
   // Then append the addrecs.
   Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
 }
@@ -1060,10 +1054,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   if (CanonicalIV &&
       SE.getTypeSizeInBits(CanonicalIV->getType()) >
       SE.getTypeSizeInBits(Ty)) {
-    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
-    SmallVector<const SCEV *, 4> NewOps(Ops.size());
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType());
+    SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+    for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop()));
     BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
@@ -1078,8 +1071,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
-    const SmallVectorImpl<const SCEV *> &SOperands = S->getOperands();
-    SmallVector<const SCEV *, 4> NewOps(SOperands.begin(), SOperands.end());
+    SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
     NewOps[0] = SE.getIntegerSCEV(0, Ty);
     const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
 
@@ -1248,6 +1240,15 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   return LHS;
 }
 
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+                                   Instruction *I) {
+  BasicBlock::iterator IP = I;
+  while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
+    ++IP;
+  Builder.SetInsertPoint(IP->getParent(), IP);
+  return expandCodeFor(SH, Ty);
+}
+
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
   // Expand the code for this SCEV.
   Value *V = expand(SH);
@@ -1286,9 +1287,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // there) so that it is guaranteed to dominate any user inside the loop.
       if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
         InsertPt = L->getHeader()->getFirstNonPHI();
-      while (isa<DbgInfoIntrinsic>(InsertPt))
-        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
-      while (isInsertedInstruction(InsertPt))
+      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
       break;
     }
@@ -1324,7 +1323,8 @@ void SCEVExpander::rememberInstruction(Value *I) {
   // subsequently inserted code will be dominated.
   if (Builder.GetInsertPoint() == I) {
     BasicBlock::iterator It = cast<Instruction>(I);
-    do { ++It; } while (isInsertedInstruction(It));
+    do { ++It; } while (isInsertedInstruction(It) ||
+                        isa<DbgInfoIntrinsic>(It));
     Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
   }
 }
@@ -1332,7 +1332,7 @@ void SCEVExpander::rememberInstruction(Value *I) {
 void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
   // If we acquired more instructions since the old insert point was saved,
   // advance past them.
-  while (isInsertedInstruction(I)) ++I;
+  while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
 
   Builder.SetInsertPoint(BB, I);
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index a328837..b9453c9 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -293,6 +293,8 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
         NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
                                          UserCS->getType()->isPacked());
+      } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) {
+        NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]);
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(&NewOps[0], NewOps.size());
       } else {
@@ -1015,6 +1017,11 @@ bool BitcodeReader::ParseConstants() {
           Elts.push_back(ValueList.getConstantFwdRef(Record[i],
                                                      STy->getElementType(i)));
         V = ConstantStruct::get(STy, Elts);
+      } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) {
+        uint64_t Index = Record[0];
+        Constant *Val = ValueList.getConstantFwdRef(Record[1],
+                                        UnTy->getElementType(Index));
+        V = ConstantUnion::get(UnTy, Val);
       } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
         const Type *EltTy = ATy->getElementType();
         for (unsigned i = 0; i != Size; ++i)
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 82e73b5..3ab2726 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -808,11 +808,25 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
-               isa<ConstantUnion>(C) || isa<ConstantVector>(V)) {
+               isa<ConstantVector>(V)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         Record.push_back(VE.getValueID(C->getOperand(i)));
       AbbrevToUse = AggregateAbbrev;
+    } else if (isa<ConstantUnion>(C)) {
+      Code = bitc::CST_CODE_AGGREGATE;
+
+      // Unions only have one entry but we must send type along with it.
+      const Type *EntryKind = C->getOperand(0)->getType();
+
+      const UnionType *UnTy = cast<UnionType>(C->getType());
+      int UnionIndex = UnTy->getElementTypeIndex(EntryKind);
+      assert(UnionIndex != -1 && "Constant union contains invalid entry");
+
+      Record.push_back(UnionIndex);
+      Record.push_back(VE.getValueID(C->getOperand(0)));
+
+      AbbrevToUse = AggregateAbbrev;
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       switch (CE->getOpcode()) {
       default:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2636e2c..1d4f7f7 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1138,6 +1138,21 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
          "Layout of constant struct may be incorrect!");
 }
 
+static void EmitGlobalConstantUnion(const ConstantUnion *CU, 
+                                    unsigned AddrSpace, AsmPrinter &AP) {
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CU->getType());
+
+  const Constant *Contents = CU->getOperand(0);
+  unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
+    
+  // Print the actually filled part
+  AP.EmitGlobalConstant(Contents, AddrSpace);
+
+  // And pad with enough zeroes
+  AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
+}
+
 static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
   // FP Constants are printed as integer constants to avoid losing
@@ -1257,9 +1272,6 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
     return EmitGlobalConstantFP(CFP, AddrSpace, *this);
-  
-  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, *this);
 
   if (isa<ConstantPointerNull>(CV)) {
     unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
@@ -1267,6 +1279,12 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
     return;
   }
   
+  if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
+    return EmitGlobalConstantUnion(CVU, AddrSpace, *this);
+  
+  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+    return EmitGlobalConstantVector(V, AddrSpace, *this);
+  
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
   OutStreamer.EmitValue(LowerConstant(CV, *this),
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 866f457..7153fe2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2531,8 +2531,8 @@ void DwarfDebug::emitDebugInfo() {
   Asm->OutStreamer.AddComment("DWARF version number");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
   Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-  EmitSectionOffset(getTempLabel("abbrev_begin"),getTempLabel("section_abbrev"),
-                    true, false);
+  EmitSectionOffset(getTempLabel("abbrev_begin"),getTempLabel("section_abbrev"), 
+                    true);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(TD->getPointerSize());
 
@@ -2842,8 +2842,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
   Asm->OutStreamer.EmitLabel(DebugFrameBegin);
 
   Asm->OutStreamer.AddComment("FDE CIE offset");
-  EmitSectionOffset(getTempLabel("debug_frame_common"),
-                    getTempLabel("section_debug_frame"), true, false);
+  EmitSectionOffset(getTempLabel("debug_frame_common"), 
+                    getTempLabel("section_debug_frame"), true);
 
   Asm->OutStreamer.AddComment("FDE initial location");
   MCSymbol *FuncBeginSym = getDWLabel("func_begin", DebugFrameInfo.Number);
@@ -2878,8 +2878,7 @@ void DwarfDebug::emitDebugPubNames() {
 
   Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
   EmitSectionOffset(getDWLabel("info_begin", ModuleCU->getID()), 
-                    getTempLabel("section_info"),
-                    true, false);
+                    getTempLabel("section_info"), true);
 
   Asm->OutStreamer.AddComment("Compilation Unit Length");
   EmitDifference(getDWLabel("info_end", ModuleCU->getID()),
@@ -2920,7 +2919,7 @@ void DwarfDebug::emitDebugPubTypes() {
 
   Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info");
   EmitSectionOffset(getDWLabel("info_begin", ModuleCU->getID()),
-                    getTempLabel("section_info"), true, false);
+                    getTempLabel("section_info"), true);
 
   Asm->OutStreamer.AddComment("Compilation ModuleCU Length");
   EmitDifference(getDWLabel("info_end", ModuleCU->getID()),
@@ -3068,8 +3067,8 @@ void DwarfDebug::emitDebugInlineInfo() {
                         getTempLabel("section_str"), true);
 
     Asm->OutStreamer.AddComment("Function name");
-    EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"),
-                      false, true);
+    EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"), 
+                      true);
     EmitULEB128(Labels.size(), "Inline count");
 
     for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 13ae43d..151e9cd 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -192,7 +192,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= MadeChangeThisIteration;
   }
 
-  // See if any jump tables have become mergable or dead as the code generator
+  // See if any jump tables have become dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
   if (JTI == 0) {
@@ -200,27 +200,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     return MadeChange;
   }
   
-  const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
-  // Figure out how these jump tables should be merged.
-  std::vector<unsigned> JTMapping;
-  JTMapping.reserve(JTs.size());
-
-  // We always keep the 0th jump table.
-  JTMapping.push_back(0);
-
-  // Scan the jump tables, seeing if there are any duplicates.  Note that this
-  // is N^2, which should be fixed someday.
-  for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
-    if (JTs[i].MBBs.empty())
-      JTMapping.push_back(i);
-    else
-      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
-  }
-
-  // If a jump table was merge with another one, walk the function rewriting
-  // references to jump tables to reference the new JT ID's.  Keep track of
-  // whether we see a jump table idx, if not, we can delete the JT.
-  BitVector JTIsLive(JTs.size());
+  // Walk the function to find jump tables that are live.
+  BitVector JTIsLive(JTI->getJumpTables().size());
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
        BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -228,17 +209,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
         MachineOperand &Op = I->getOperand(op);
         if (!Op.isJTI()) continue;
-        unsigned NewIdx = JTMapping[Op.getIndex()];
-        Op.setIndex(NewIdx);
 
         // Remember that this JT is live.
-        JTIsLive.set(NewIdx);
+        JTIsLive.set(Op.getIndex());
       }
   }
 
-  // Finally, remove dead jump tables.  This happens either because the
-  // indirect jump was unreachable (and thus deleted) or because the jump
-  // table was merged with some other one.
+  // Finally, remove dead jump tables.  This happens when the
+  // indirect jump was unreachable (and thus deleted).
   for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
     if (!JTIsLive.test(i)) {
       JTI->RemoveJumpTable(i);
@@ -1143,22 +1121,6 @@ ReoptimizeBlock:
           !IsBetterFallthrough(PriorTBB, MBB))
         DoTransform = false;
 
-      // We don't want to do this transformation if we have control flow like:
-      //   br cond BB2
-      // BB1:
-      //   ..
-      //   jmp BBX
-      // BB2:
-      //   ..
-      //   ret
-      //
-      // In this case, we could actually be moving the return block *into* a
-      // loop!
-      if (DoTransform && !MBB->succ_empty() &&
-          (!PriorTBB->canFallThrough() || PriorTBB->empty()))
-        DoTransform = false;
-
-
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 1a23be0..6d7cc51 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -332,7 +332,7 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
                                            MachineBasicBlock::iterator MI,
                                            DebugLoc DL) const {
-  MCSymbol *Label = MBB.getParent()->getContext().GetOrCreateTemporarySymbol();
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
   BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
   return Label;
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index dbb5e19..b3e9216 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -141,7 +141,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
       if (mii->isDebugValue())
-        OS << SlotIndex::getEmptyKey() << '\t' << *mii;
+        OS << "    \t" << *mii;
       else
         OS << getInstructionIndex(mii) << '\t' << *mii;
     }
@@ -583,6 +583,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
   MachineBasicBlock::iterator mi = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  // Skip over DBG_VALUE at the start of the MBB.
+  if (mi != E && mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      ;
+    if (mi == E)
+      // MBB is empty except for DBG_VALUE's.
+      return;
+  }
+
   SlotIndex baseIndex = MIIdx;
   SlotIndex start = baseIndex;
   if (getInstructionFromIndex(baseIndex) == 0)
@@ -591,12 +601,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   SlotIndex end = baseIndex;
   bool SeenDefUse = false;
 
-  MachineBasicBlock::iterator E = MBB->end();  
   while (mi != E) {
-    while (mi != E && mi->isDebugValue())
-      ++mi;
-    if (mi == E)
-      break;
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
@@ -613,10 +618,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       break;
     }
 
-    ++mi;
-    if (mi != E && !mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      // Skip over DBG_VALUE.
+      ;
+    if (mi != E)
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-    }
   }
 
   // Live-in register might not be used at all.
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 37f3d22..5772b2f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -88,18 +88,15 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MCContext &ctx)
   : Fn(F), Target(TM), Ctx(ctx) {
   if (TM.getRegisterInfo())
-    RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
-                  MachineRegisterInfo(*TM.getRegisterInfo());
+    RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
-                  MachineFrameInfo(*TM.getFrameInfo());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
   if (Fn->hasFnAttr(Attribute::StackAlignment))
     FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
         Fn->getAttributes().getFnAttributes()));
-  ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
-                     MachineConstantPool(TM.getTargetData());
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
   Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
   FunctionNumber = FunctionNum;
   JumpTableInfo = 0;
@@ -132,7 +129,7 @@ MachineJumpTableInfo *MachineFunction::
 getOrCreateJumpTableInfo(unsigned EntryKind) {
   if (JumpTableInfo) return JumpTableInfo;
   
-  JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+  JumpTableInfo = new (Allocator)
     MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
   return JumpTableInfo;
 }
@@ -229,14 +226,13 @@ MachineMemOperand *
 MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
                                       int64_t o, uint64_t s,
                                       unsigned base_alignment) {
-  return new (Allocator.Allocate<MachineMemOperand>())
-             MachineMemOperand(v, f, o, s, base_alignment);
+  return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
 }
 
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                       int64_t Offset, uint64_t Size) {
-  return new (Allocator.Allocate<MachineMemOperand>())
+  return new (Allocator)
              MachineMemOperand(MMO->getValue(), MMO->getFlags(),
                                int64_t(uint64_t(MMO->getOffset()) +
                                        uint64_t(Offset)),
@@ -600,17 +596,15 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   return ~0;
 }
 
-/// getJumpTableIndex - Create a new jump table entry in the jump table info
-/// or return an existing one.
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
 ///
-unsigned MachineJumpTableInfo::getJumpTableIndex(
+unsigned MachineJumpTableInfo::createJumpTableIndex(
                                const std::vector<MachineBasicBlock*> &DestBBs) {
   assert(!DestBBs.empty() && "Cannot create an empty jump table!");
   JumpTables.push_back(MachineJumpTableEntry(DestBBs));
   return JumpTables.size()-1;
 }
 
-
 /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
 /// the jump tables to branch to New instead.
 bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 194fc14..2c69065 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -739,7 +739,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
   
     // Physical registers and those that are not live-out of the block are
     // killed/dead at their last use/def within this block.
-    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn)
+    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
       if (MO.isUse()) {
         // Don't mark uses that are tied to defs as kills.
         if (!MI->isRegTiedToDefOperand(idx))
@@ -747,6 +747,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       } else {
         MO.setIsDead(true);
       }
+    }
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b5af2c1..63ca8e6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -851,8 +851,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::MERGE_VALUES:
   case ISD::EH_RETURN:
   case ISD::FRAME_TO_ARGS_OFFSET:
-  case ISD::FP16_TO_FP32:
-  case ISD::FP32_TO_FP16:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8363c3a..ed5f24c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2167,7 +2167,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       } else
         return LdOp;
     } else {
-      unsigned NumElts = WidenWidth / LdWidth;
+      unsigned NumElts = WidenWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
       SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
       return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 480c068..ed9146d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -906,8 +906,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
       return SDValue(N, 0);
 
   if (!N) {
-    N = NodeAllocator.Allocate<ConstantSDNode>();
-    new (N) ConstantSDNode(isT, &Val, EltVT);
+    N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -950,8 +949,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
       return SDValue(N, 0);
 
   if (!N) {
-    N = NodeAllocator.Allocate<ConstantFPSDNode>();
-    new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -1010,8 +1008,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
-  new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
+  SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT,
+                                                      Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1026,8 +1024,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
-  new (N) FrameIndexSDNode(FI, VT, isTarget);
+  SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1046,8 +1043,8 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
-  new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
+  SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+                                                  TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1072,8 +1069,8 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1099,8 +1096,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1114,8 +1111,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
-  new (N) BasicBlockSDNode(MBB);
+  SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1130,8 +1126,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
     ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
 
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<VTSDNode>();
-  new (N) VTSDNode(VT);
+  N = new (NodeAllocator) VTSDNode(VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1139,8 +1134,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
 SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
+  N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1151,8 +1145,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
     TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
                                                                TargetFlags)];
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+  N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1162,8 +1155,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
     CondCodeNodes.resize(Cond+1);
 
   if (CondCodeNodes[Cond] == 0) {
-    CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
-    new (N) CondCodeSDNode(Cond);
+    CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
     CondCodeNodes[Cond] = N;
     AllNodes.push_back(N);
   }
@@ -1268,8 +1260,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
   int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
   memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
 
-  ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
-  new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+  ShuffleVectorSDNode *N =
+    new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1292,8 +1284,8 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
-  new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+  CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+                                                           Code);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1307,8 +1299,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
-  new (N) RegisterSDNode(RegNo, VT);
+  SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1323,8 +1314,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   
-  SDNode *N = NodeAllocator.Allocate<EHLabelSDNode>();
-  new (N) EHLabelSDNode(dl, Root, Label);
+  SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1344,8 +1334,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
-  new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1363,8 +1352,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
-  new (N) SrcValueSDNode(V);
+  SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -2313,8 +2301,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<SDNode>();
-  new (N) SDNode(Opcode, DL, getVTList(VT));
+  SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
   CSEMap.InsertNode(N, IP);
 
   AllNodes.push_back(N);
@@ -2542,12 +2529,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<UnarySDNode>();
-    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<UnarySDNode>();
-    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
   }
 
   AllNodes.push_back(N);
@@ -2975,12 +2960,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<BinarySDNode>();
-    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<BinarySDNode>();
-    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
   }
 
   AllNodes.push_back(N);
@@ -3053,12 +3036,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<TernarySDNode>();
-    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<TernarySDNode>();
-    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
   }
 
   AllNodes.push_back(N);
@@ -3659,8 +3640,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Cmp, Swp, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3722,8 +3703,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Val, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3801,12 +3782,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
       return SDValue(E, 0);
     }
 
-    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
   }
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3879,8 +3860,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
     cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
-  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
+  SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+                                             MemVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3961,8 +3942,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              false, VT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4025,8 +4006,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              true, SVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4048,10 +4029,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, AM,
-                      ST->isTruncatingStore(), ST->getMemoryVT(),
-                      ST->getMemOperand());
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+                                              ST->isTruncatingStore(),
+                                              ST->getMemoryVT(),
+                                              ST->getMemOperand());
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4122,12 +4103,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<SDNode>();
-    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<SDNode>();
-    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
   }
 
   AllNodes.push_back(N);
@@ -4190,32 +4169,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
       return SDValue(E, 0);
 
     if (NumOps == 1) {
-      N = NodeAllocator.Allocate<UnarySDNode>();
-      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
     } else if (NumOps == 2) {
-      N = NodeAllocator.Allocate<BinarySDNode>();
-      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
     } else if (NumOps == 3) {
-      N = NodeAllocator.Allocate<TernarySDNode>();
-      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
     } else {
-      N = NodeAllocator.Allocate<SDNode>();
-      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
     }
     CSEMap.InsertNode(N, IP);
   } else {
     if (NumOps == 1) {
-      N = NodeAllocator.Allocate<UnarySDNode>();
-      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
     } else if (NumOps == 2) {
-      N = NodeAllocator.Allocate<BinarySDNode>();
-      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
     } else if (NumOps == 3) {
-      N = NodeAllocator.Allocate<TernarySDNode>();
-      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
     } else {
-      N = NodeAllocator.Allocate<SDNode>();
-      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
     }
   }
   AllNodes.push_back(N);
@@ -4640,7 +4613,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
         // remainder of the current SelectionDAG iteration, so we can allocate
         // the operands directly out of a pool with no recycling metadata.
         MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
-                        Ops, NumOps);
+                         Ops, NumOps);
       else
         MN->InitOperands(MN->LocalOperands, Ops, NumOps);
       MN->OperandsNeedDelete = false;
@@ -4814,8 +4787,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
   }
 
   // Allocate a new MachineSDNode.
-  N = NodeAllocator.Allocate<MachineSDNode>();
-  new (N) MachineSDNode(~Opcode, DL, VTs);
+  N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
 
   // Initialize the operands list.
   if (NumOps > array_lengthof(N->LocalOperands))
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3d9a4d5..12096b9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1675,11 +1675,10 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
     }
   }
 
-  // Create a jump table index for this jump table, or return an existing
-  // one.
+  // Create a jump table index for this jump table.
   unsigned JTEncoding = TLI.getJumpTableEncoding();
   unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
-                       ->getJumpTableIndex(DestBBs);
+                       ->createJumpTableIndex(DestBBs);
 
   // Set the jump table information so that we can codegen it as a second
   // MachineBasicBlock
@@ -2592,6 +2591,11 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
       }
 
       Ty = StTy->getElementType(Field);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      
+      // Offset canonically 0 for unions, but type changes
+      Ty = UnTy->getElementType(Field);
     } else {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
@@ -4277,6 +4281,9 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
          --BBI) {
       if (&*BBI == I)
         break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
           !BBI->isSafeToSpeculativelyExecute())
         return false;
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 5c62118..97e858f 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -1639,11 +1640,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // Save a copy of the virtual register live interval. We'll manually
   // merge this into the "real" physical register live interval this is
   // coalesced with.
-  LiveInterval *SavedLI = 0;
+  OwningPtr<LiveInterval> SavedLI;
   if (RealDstReg)
-    SavedLI = li_->dupInterval(&SrcInt);
+    SavedLI.reset(li_->dupInterval(&SrcInt));
   else if (RealSrcReg)
-    SavedLI = li_->dupInterval(&DstInt);
+    SavedLI.reset(li_->dupInterval(&DstInt));
 
   // Check if it is necessary to propagate "isDead" property.
   if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
@@ -1853,7 +1854,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // Manually deleted the live interval copy.
   if (SavedLI) {
     SavedLI->clear();
-    delete SavedLI;
+    SavedLI.reset();
   }
 
   // If resulting interval has a preference that no longer fits because of subreg
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index fa3785d..aa6e2b4 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -495,7 +495,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (InstrCount == MaxDuplicateCount) return false;
     // Remember if we saw a call.
     if (I->getDesc().isCall()) HasCall = true;
-    if (!I->isPHI())
+    if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
   }
   // Heuristically, don't tail-duplicate calls if it would expand code size,
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 4cf71dc..dba0e14 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -12,11 +12,13 @@ add_llvm_library(LLVMMC
   MCInstPrinter.cpp
   MCMachOStreamer.cpp
   MCNullStreamer.cpp
+  MCObjectWriter.cpp
   MCSection.cpp
   MCSectionELF.cpp
   MCSectionMachO.cpp
   MCStreamer.cpp
   MCSymbol.cpp
   MCValue.cpp
+  MachObjectWriter.cpp
   TargetAsmBackend.cpp
   )
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 7f39471..2025463 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -29,7 +30,7 @@ namespace {
 class MCAsmStreamer : public MCStreamer {
   formatted_raw_ostream &OS;
   const MCAsmInfo &MAI;
-  MCInstPrinter *InstPrinter;
+  OwningPtr<MCInstPrinter> InstPrinter;
   MCCodeEmitter *Emitter;
   
   SmallString<128> CommentToEmit;
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 4cf8b7e..beecf7e 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -10,18 +10,16 @@
 #define DEBUG_TYPE "assembler"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -33,8 +31,6 @@
 #include <vector>
 using namespace llvm;
 
-class MachObjectWriter;
-
 STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
 
 // FIXME FIXME FIXME: There are number of places in this file where we convert
@@ -42,917 +38,6 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
 // object file, which may truncate it. We should detect that truncation where
 // invalid and report errors back.
 
-static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
-                          MachObjectWriter &MOW);
-
-static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW);
-
-/// isVirtualSection - Check if this is a section which does not actually exist
-/// in the object file.
-static bool isVirtualSection(const MCSection &Section) {
-  // FIXME: Lame.
-  const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
-  return (SMO.getType() == MCSectionMachO::S_ZEROFILL);
-}
-
-static unsigned getFixupKindLog2Size(unsigned Kind) {
-  switch (Kind) {
-  default: llvm_unreachable("invalid fixup kind!");
-  case X86::reloc_pcrel_1byte:
-  case FK_Data_1: return 0;
-  case FK_Data_2: return 1;
-  case X86::reloc_pcrel_4byte:
-  case X86::reloc_riprel_4byte:
-  case FK_Data_4: return 2;
-  case FK_Data_8: return 3;
-  }
-}
-
-static bool isFixupKindPCRel(unsigned Kind) {
-  switch (Kind) {
-  default:
-    return false;
-  case X86::reloc_pcrel_1byte:
-  case X86::reloc_pcrel_4byte:
-  case X86::reloc_riprel_4byte:
-    return true;
-  }
-}
-
-class MachObjectWriter {
-  // See <mach-o/loader.h>.
-  enum {
-    Header_Magic32 = 0xFEEDFACE,
-    Header_Magic64 = 0xFEEDFACF
-  };
-
-  enum {
-    Header32Size = 28,
-    Header64Size = 32,
-    SegmentLoadCommand32Size = 56,
-    SegmentLoadCommand64Size = 72,
-    Section32Size = 68,
-    Section64Size = 80,
-    SymtabLoadCommandSize = 24,
-    DysymtabLoadCommandSize = 80,
-    Nlist32Size = 12,
-    Nlist64Size = 16,
-    RelocationInfoSize = 8
-  };
-
-  enum HeaderFileType {
-    HFT_Object = 0x1
-  };
-
-  enum HeaderFlags {
-    HF_SubsectionsViaSymbols = 0x2000
-  };
-
-  enum LoadCommandType {
-    LCT_Segment = 0x1,
-    LCT_Symtab = 0x2,
-    LCT_Dysymtab = 0xb,
-    LCT_Segment64 = 0x19
-  };
-
-  // See <mach-o/nlist.h>.
-  enum SymbolTypeType {
-    STT_Undefined = 0x00,
-    STT_Absolute  = 0x02,
-    STT_Section   = 0x0e
-  };
-
-  enum SymbolTypeFlags {
-    // If any of these bits are set, then the entry is a stab entry number (see
-    // <mach-o/stab.h>. Otherwise the other masks apply.
-    STF_StabsEntryMask = 0xe0,
-
-    STF_TypeMask       = 0x0e,
-    STF_External       = 0x01,
-    STF_PrivateExtern  = 0x10
-  };
-
-  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
-  /// symbol entry.
-  enum IndirectSymbolFlags {
-    ISF_Local    = 0x80000000,
-    ISF_Absolute = 0x40000000
-  };
-
-  /// RelocationFlags - Special flags for addresses.
-  enum RelocationFlags {
-    RF_Scattered = 0x80000000
-  };
-
-  enum RelocationInfoType {
-    RIT_Vanilla             = 0,
-    RIT_Pair                = 1,
-    RIT_Difference          = 2,
-    RIT_PreboundLazyPointer = 3,
-    RIT_LocalDifference     = 4
-  };
-
-  /// MachSymbolData - Helper struct for containing some precomputed information
-  /// on symbols.
-  struct MachSymbolData {
-    MCSymbolData *SymbolData;
-    uint64_t StringIndex;
-    uint8_t SectionIndex;
-
-    // Support lexicographic sorting.
-    bool operator<(const MachSymbolData &RHS) const {
-      const std::string &Name = SymbolData->getSymbol().getName();
-      return Name < RHS.SymbolData->getSymbol().getName();
-    }
-  };
-
-  raw_ostream &OS;
-  unsigned Is64Bit : 1;
-  unsigned IsLSB : 1;
-
-public:
-  MachObjectWriter(raw_ostream &_OS, bool _Is64Bit, bool _IsLSB = true)
-    : OS(_OS), Is64Bit(_Is64Bit), IsLSB(_IsLSB) {
-  }
-
-  /// @name Helper Methods
-  /// @{
-
-  void Write8(uint8_t Value) {
-    OS << char(Value);
-  }
-
-  void Write16(uint16_t Value) {
-    if (IsLSB) {
-      Write8(uint8_t(Value >> 0));
-      Write8(uint8_t(Value >> 8));
-    } else {
-      Write8(uint8_t(Value >> 8));
-      Write8(uint8_t(Value >> 0));
-    }
-  }
-
-  void Write32(uint32_t Value) {
-    if (IsLSB) {
-      Write16(uint16_t(Value >> 0));
-      Write16(uint16_t(Value >> 16));
-    } else {
-      Write16(uint16_t(Value >> 16));
-      Write16(uint16_t(Value >> 0));
-    }
-  }
-
-  void Write64(uint64_t Value) {
-    if (IsLSB) {
-      Write32(uint32_t(Value >> 0));
-      Write32(uint32_t(Value >> 32));
-    } else {
-      Write32(uint32_t(Value >> 32));
-      Write32(uint32_t(Value >> 0));
-    }
-  }
-
-  void WriteZeros(unsigned N) {
-    const char Zeros[16] = { 0 };
-
-    for (unsigned i = 0, e = N / 16; i != e; ++i)
-      OS << StringRef(Zeros, 16);
-
-    OS << StringRef(Zeros, N % 16);
-  }
-
-  void WriteString(StringRef Str, unsigned ZeroFillSize = 0) {
-    OS << Str;
-    if (ZeroFillSize)
-      WriteZeros(ZeroFillSize - Str.size());
-  }
-
-  /// @}
-
-  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                   bool SubsectionsViaSymbols) {
-    uint32_t Flags = 0;
-
-    if (SubsectionsViaSymbols)
-      Flags |= HF_SubsectionsViaSymbols;
-
-    // struct mach_header (28 bytes) or
-    // struct mach_header_64 (32 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
-
-    // FIXME: Support cputype.
-    Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
-    // FIXME: Support cpusubtype.
-    Write32(MachO::CPUSubType_I386_ALL);
-    Write32(HFT_Object);
-    Write32(NumLoadCommands);    // Object files have a single load command, the
-                                 // segment.
-    Write32(LoadCommandsSize);
-    Write32(Flags);
-    if (Is64Bit)
-      Write32(0); // reserved
-
-    assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
-  }
-
-  /// WriteSegmentLoadCommand - Write a segment load command.
-  ///
-  /// \arg NumSections - The number of sections in this segment.
-  /// \arg SectionDataSize - The total size of the sections.
-  void WriteSegmentLoadCommand(unsigned NumSections,
-                               uint64_t VMSize,
-                               uint64_t SectionDataStartOffset,
-                               uint64_t SectionDataSize) {
-    // struct segment_command (56 bytes) or
-    // struct segment_command_64 (72 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
-      SegmentLoadCommand32Size;
-    Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
-    Write32(SegmentLoadCommandSize +
-            NumSections * (Is64Bit ? Section64Size : Section32Size));
-
-    WriteString("", 16);
-    if (Is64Bit) {
-      Write64(0); // vmaddr
-      Write64(VMSize); // vmsize
-      Write64(SectionDataStartOffset); // file offset
-      Write64(SectionDataSize); // file size
-    } else {
-      Write32(0); // vmaddr
-      Write32(VMSize); // vmsize
-      Write32(SectionDataStartOffset); // file offset
-      Write32(SectionDataSize); // file size
-    }
-    Write32(0x7); // maxprot
-    Write32(0x7); // initprot
-    Write32(NumSections);
-    Write32(0); // flags
-
-    assert(OS.tell() - Start == SegmentLoadCommandSize);
-  }
-
-  void WriteSection(const MCSectionData &SD, uint64_t FileOffset,
-                    uint64_t RelocationsStart, unsigned NumRelocations) {
-    // The offset is unused for virtual sections.
-    if (isVirtualSection(SD.getSection())) {
-      assert(SD.getFileSize() == 0 && "Invalid file size!");
-      FileOffset = 0;
-    }
-
-    // struct section (68 bytes) or
-    // struct section_64 (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    // FIXME: cast<> support!
-    const MCSectionMachO &Section =
-      static_cast<const MCSectionMachO&>(SD.getSection());
-    WriteString(Section.getSectionName(), 16);
-    WriteString(Section.getSegmentName(), 16);
-    if (Is64Bit) {
-      Write64(SD.getAddress()); // address
-      Write64(SD.getSize()); // size
-    } else {
-      Write32(SD.getAddress()); // address
-      Write32(SD.getSize()); // size
-    }
-    Write32(FileOffset);
-
-    unsigned Flags = Section.getTypeAndAttributes();
-    if (SD.hasInstructions())
-      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
-
-    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
-    Write32(Log2_32(SD.getAlignment()));
-    Write32(NumRelocations ? RelocationsStart : 0);
-    Write32(NumRelocations);
-    Write32(Flags);
-    Write32(0); // reserved1
-    Write32(Section.getStubSize()); // reserved2
-    if (Is64Bit)
-      Write32(0); // reserved3
-
-    assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
-  }
-
-  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
-                              uint32_t StringTableOffset,
-                              uint32_t StringTableSize) {
-    // struct symtab_command (24 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(LCT_Symtab);
-    Write32(SymtabLoadCommandSize);
-    Write32(SymbolOffset);
-    Write32(NumSymbols);
-    Write32(StringTableOffset);
-    Write32(StringTableSize);
-
-    assert(OS.tell() - Start == SymtabLoadCommandSize);
-  }
-
-  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
-                                uint32_t NumLocalSymbols,
-                                uint32_t FirstExternalSymbol,
-                                uint32_t NumExternalSymbols,
-                                uint32_t FirstUndefinedSymbol,
-                                uint32_t NumUndefinedSymbols,
-                                uint32_t IndirectSymbolOffset,
-                                uint32_t NumIndirectSymbols) {
-    // struct dysymtab_command (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(LCT_Dysymtab);
-    Write32(DysymtabLoadCommandSize);
-    Write32(FirstLocalSymbol);
-    Write32(NumLocalSymbols);
-    Write32(FirstExternalSymbol);
-    Write32(NumExternalSymbols);
-    Write32(FirstUndefinedSymbol);
-    Write32(NumUndefinedSymbols);
-    Write32(0); // tocoff
-    Write32(0); // ntoc
-    Write32(0); // modtaboff
-    Write32(0); // nmodtab
-    Write32(0); // extrefsymoff
-    Write32(0); // nextrefsyms
-    Write32(IndirectSymbolOffset);
-    Write32(NumIndirectSymbols);
-    Write32(0); // extreloff
-    Write32(0); // nextrel
-    Write32(0); // locreloff
-    Write32(0); // nlocrel
-
-    assert(OS.tell() - Start == DysymtabLoadCommandSize);
-  }
-
-  void WriteNlist(MachSymbolData &MSD) {
-    MCSymbolData &Data = *MSD.SymbolData;
-    const MCSymbol &Symbol = Data.getSymbol();
-    uint8_t Type = 0;
-    uint16_t Flags = Data.getFlags();
-    uint32_t Address = 0;
-
-    // Set the N_TYPE bits. See <mach-o/nlist.h>.
-    //
-    // FIXME: Are the prebound or indirect fields possible here?
-    if (Symbol.isUndefined())
-      Type = STT_Undefined;
-    else if (Symbol.isAbsolute())
-      Type = STT_Absolute;
-    else
-      Type = STT_Section;
-
-    // FIXME: Set STAB bits.
-
-    if (Data.isPrivateExtern())
-      Type |= STF_PrivateExtern;
-
-    // Set external bit.
-    if (Data.isExternal() || Symbol.isUndefined())
-      Type |= STF_External;
-
-    // Compute the symbol address.
-    if (Symbol.isDefined()) {
-      if (Symbol.isAbsolute()) {
-        llvm_unreachable("FIXME: Not yet implemented!");
-      } else {
-        Address = Data.getAddress();
-      }
-    } else if (Data.isCommon()) {
-      // Common symbols are encoded with the size in the address
-      // field, and their alignment in the flags.
-      Address = Data.getCommonSize();
-
-      // Common alignment is packed into the 'desc' bits.
-      if (unsigned Align = Data.getCommonAlignment()) {
-        unsigned Log2Size = Log2_32(Align);
-        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
-        if (Log2Size > 15)
-          llvm_report_error("invalid 'common' alignment '" +
-                            Twine(Align) + "'");
-        // FIXME: Keep this mask with the SymbolFlags enumeration.
-        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
-      }
-    }
-
-    // struct nlist (12 bytes)
-
-    Write32(MSD.StringIndex);
-    Write8(Type);
-    Write8(MSD.SectionIndex);
-
-    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
-    // value.
-    Write16(Flags);
-    if (Is64Bit)
-      Write64(Address);
-    else
-      Write32(Address);
-  }
-
-  struct MachRelocationEntry {
-    uint32_t Word0;
-    uint32_t Word1;
-  };
-  void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment,
-                                      MCAsmFixup &Fixup,
-                                      const MCValue &Target,
-                                     std::vector<MachRelocationEntry> &Relocs) {
-    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
-    unsigned Type = RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = Target.getSymA();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      llvm_report_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = A_SD->getAddress();
-    uint32_t Value2 = 0;
-
-    if (const MCSymbol *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(*B);
-
-      if (!B_SD->getFragment())
-        llvm_report_error("symbol '" + B->getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      //
-      // Note that there is no longer any semantic difference between these two
-      // relocation types from the linkers point of view, this is done solely
-      // for pedantic compatibility with 'as'.
-      Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
-      Value2 = B_SD->getAddress();
-    }
-
-    MachRelocationEntry MRE;
-    MRE.Word0 = ((Address   <<  0) |
-                 (Type      << 24) |
-                 (Log2Size  << 28) |
-                 (IsPCRel   << 30) |
-                 RF_Scattered);
-    MRE.Word1 = Value;
-    Relocs.push_back(MRE);
-
-    if (Type == RIT_Difference || Type == RIT_LocalDifference) {
-      MachRelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocs.push_back(MRE);
-    }
-  }
-
-  void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment,
-                             MCAsmFixup &Fixup,
-                             std::vector<MachRelocationEntry> &Relocs) {
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
-
-    // FIXME: Share layout object.
-    MCAsmLayout Layout(Asm);
-
-    // Evaluate the fixup; if the value was resolved, no relocation is needed.
-    MCValue Target;
-    if (Asm.EvaluateFixup(Layout, Fixup, &Fragment, Target, Fixup.FixedValue))
-      return;
-
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel)
-      Offset += 1 << Log2Size;
-    if (Target.getSymB() ||
-        (Target.getSymA() && !Target.getSymA()->isUndefined() &&
-         Offset))
-      return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target,
-                                            Relocs);
-
-    // See <reloc.h>.
-    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
-    uint32_t Value = 0;
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      //
-      // FIXME: Currently, these are never generated (see code below). I cannot
-      // find a case where they are actually emitted.
-      Type = RIT_Vanilla;
-      Value = 0;
-    } else {
-      const MCSymbol *Symbol = Target.getSymA();
-      MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
-
-      if (Symbol->isUndefined()) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        Value = 0;
-      } else {
-        // The index is the section ordinal.
-        //
-        // FIXME: O(N)
-        Index = 1;
-        MCAssembler::iterator it = Asm.begin(), ie = Asm.end();
-        for (; it != ie; ++it, ++Index)
-          if (&*it == SD->getFragment()->getParent())
-            break;
-        assert(it != ie && "Unable to find section index!");
-        Value = SD->getAddress();
-      }
-
-      Type = RIT_Vanilla;
-    }
-
-    // struct relocation_info (8 bytes)
-    MachRelocationEntry MRE;
-    MRE.Word0 = Address;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocs.push_back(MRE);
-  }
-
-  void BindIndirectSymbols(MCAssembler &Asm) {
-    // This is the point where 'as' creates actual symbols for indirect symbols
-    // (in the following two passes). It would be easier for us to do this
-    // sooner when we see the attribute, but that makes getting the order in the
-    // symbol table much more complicated than it is worth.
-    //
-    // FIXME: Revisit this when the dust settles.
-
-    // Bind non lazy symbol pointers first.
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-      // FIXME: cast<> support!
-      const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
-        continue;
-
-      Asm.getOrCreateSymbolData(*it->Symbol);
-    }
-
-    // Then lazy symbol pointers and symbol stubs.
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-      // FIXME: cast<> support!
-      const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
-          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
-        continue;
-
-      // Set the symbol type to undefined lazy, but only on construction.
-      //
-      // FIXME: Do not hardcode.
-      bool Created;
-      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
-      if (Created)
-        Entry.setFlags(Entry.getFlags() | 0x0001);
-    }
-  }
-
-  /// ComputeSymbolTable - Compute the symbol table data
-  ///
-  /// \param StringTable [out] - The string table data.
-  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-  /// string table.
-  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
-                          std::vector<MachSymbolData> &LocalSymbolData,
-                          std::vector<MachSymbolData> &ExternalSymbolData,
-                          std::vector<MachSymbolData> &UndefinedSymbolData) {
-    // Build section lookup table.
-    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
-    unsigned Index = 1;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it, ++Index)
-      SectionIndexMap[&it->getSection()] = Index;
-    assert(Index <= 256 && "Too many sections!");
-
-    // Index 0 is always the empty string.
-    StringMap<uint64_t> StringIndexMap;
-    StringTable += '\x00';
-
-    // Build the symbol arrays and the string table, but only for non-local
-    // symbols.
-    //
-    // The particular order that we collect the symbols and create the string
-    // table, then sort the symbols is chosen to match 'as'. Even though it
-    // doesn't matter for correctness, this is important for letting us diff .o
-    // files.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore assembler temporaries.
-      if (it->getSymbol().isTemporary() &&
-          (!it->getFragment() ||
-           !Asm.getBackend().doesSectionRequireSymbols(
-             it->getFragment()->getParent()->getSection())))
-        continue;
-
-      if (!it->isExternal() && !Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
-
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isUndefined()) {
-        MSD.SectionIndex = 0;
-        UndefinedSymbolData.push_back(MSD);
-      } else if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        ExternalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        ExternalSymbolData.push_back(MSD);
-      }
-    }
-
-    // Now add the data for local symbols.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore assembler temporaries.
-      if (it->getSymbol().isTemporary() &&
-          (!it->getFragment() ||
-           !Asm.getBackend().doesSectionRequireSymbols(
-             it->getFragment()->getParent()->getSection())))
-        continue;
-
-      if (it->isExternal() || Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
-
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        LocalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        LocalSymbolData.push_back(MSD);
-      }
-    }
-
-    // External and undefined symbols are required to be in lexicographic order.
-    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
-    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
-
-    // Set the symbol indices.
-    Index = 0;
-    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-      LocalSymbolData[i].SymbolData->setIndex(Index++);
-    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-      ExternalSymbolData[i].SymbolData->setIndex(Index++);
-    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
-
-    // The string table is padded to a multiple of 4.
-    while (StringTable.size() % 4)
-      StringTable += '\x00';
-  }
-
-  void WriteObject(MCAssembler &Asm) {
-    unsigned NumSections = Asm.size();
-
-    // Create symbol data for any indirect symbols.
-    BindIndirectSymbols(Asm);
-
-    // Compute symbol table information.
-    SmallString<256> StringTable;
-    std::vector<MachSymbolData> LocalSymbolData;
-    std::vector<MachSymbolData> ExternalSymbolData;
-    std::vector<MachSymbolData> UndefinedSymbolData;
-    unsigned NumSymbols = Asm.symbol_size();
-
-    // No symbol table command is written if there are no symbols.
-    if (NumSymbols)
-      ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
-                         UndefinedSymbolData);
-
-    // The section data starts after the header, the segment load command (and
-    // section headers) and the symbol table.
-    unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize = Is64Bit ?
-      SegmentLoadCommand64Size + NumSections * Section64Size :
-      SegmentLoadCommand32Size + NumSections * Section32Size;
-
-    // Add the symbol table load command sizes, if used.
-    if (NumSymbols) {
-      NumLoadCommands += 2;
-      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
-    }
-
-    // Compute the total size of the section data, as well as its file size and
-    // vm size.
-    uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
-      + LoadCommandsSize;
-    uint64_t SectionDataSize = 0;
-    uint64_t SectionDataFileSize = 0;
-    uint64_t VMSize = 0;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      MCSectionData &SD = *it;
-
-      VMSize = std::max(VMSize, SD.getAddress() + SD.getSize());
-
-      if (isVirtualSection(SD.getSection()))
-        continue;
-
-      SectionDataSize = std::max(SectionDataSize,
-                                 SD.getAddress() + SD.getSize());
-      SectionDataFileSize = std::max(SectionDataFileSize,
-                                     SD.getAddress() + SD.getFileSize());
-    }
-
-    // The section data is padded to 4 bytes.
-    //
-    // FIXME: Is this machine dependent?
-    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
-    SectionDataFileSize += SectionDataPadding;
-
-    // Write the prolog, starting with the header and load command...
-    WriteHeader(NumLoadCommands, LoadCommandsSize,
-                Asm.getSubsectionsViaSymbols());
-    WriteSegmentLoadCommand(NumSections, VMSize,
-                            SectionDataStart, SectionDataSize);
-
-    // ... and then the section headers.
-    //
-    // We also compute the section relocations while we do this. Note that
-    // computing relocation info will also update the fixup to have the correct
-    // value; this will overwrite the appropriate data in the fragment when it
-    // is written.
-    std::vector<MachRelocationEntry> RelocInfos;
-    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      MCSectionData &SD = *it;
-
-      // The assembler writes relocations in the reverse order they were seen.
-      //
-      // FIXME: It is probably more complicated than this.
-      unsigned NumRelocsStart = RelocInfos.size();
-      for (MCSectionData::reverse_iterator it2 = SD.rbegin(),
-             ie2 = SD.rend(); it2 != ie2; ++it2)
-        if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2))
-          for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i)
-            ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1],
-                                  RelocInfos);
-
-      unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
-      uint64_t SectionStart = SectionDataStart + SD.getAddress();
-      WriteSection(SD, SectionStart, RelocTableEnd, NumRelocs);
-      RelocTableEnd += NumRelocs * RelocationInfoSize;
-    }
-
-    // Write the symbol table load command, if used.
-    if (NumSymbols) {
-      unsigned FirstLocalSymbol = 0;
-      unsigned NumLocalSymbols = LocalSymbolData.size();
-      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
-      unsigned NumExternalSymbols = ExternalSymbolData.size();
-      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
-      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
-      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
-      unsigned NumSymTabSymbols =
-        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
-      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
-      uint64_t IndirectSymbolOffset = 0;
-
-      // If used, the indirect symbols are written after the section data.
-      if (NumIndirectSymbols)
-        IndirectSymbolOffset = RelocTableEnd;
-
-      // The symbol table is written after the indirect symbol data.
-      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
-
-      // The string table is written after symbol table.
-      uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
-                                                Nlist32Size);
-      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
-                             StringTableOffset, StringTable.size());
-
-      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
-                               FirstExternalSymbol, NumExternalSymbols,
-                               FirstUndefinedSymbol, NumUndefinedSymbols,
-                               IndirectSymbolOffset, NumIndirectSymbols);
-    }
-
-    // Write the actual section data.
-    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-      WriteFileData(OS, *it, *this);
-
-    // Write the extra padding.
-    WriteZeros(SectionDataPadding);
-
-    // Write the relocation entries.
-    for (unsigned i = 0, e = RelocInfos.size(); i != e; ++i) {
-      Write32(RelocInfos[i].Word0);
-      Write32(RelocInfos[i].Word1);
-    }
-
-    // Write the symbol table data, if used.
-    if (NumSymbols) {
-      // Write the indirect symbol entries.
-      for (MCAssembler::indirect_symbol_iterator
-             it = Asm.indirect_symbol_begin(),
-             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-        // Indirect symbols in the non lazy symbol pointer section have some
-        // special handling.
-        const MCSectionMachO &Section =
-          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
-          // If this symbol is defined and internal, mark it as such.
-          if (it->Symbol->isDefined() &&
-              !Asm.getSymbolData(*it->Symbol).isExternal()) {
-            uint32_t Flags = ISF_Local;
-            if (it->Symbol->isAbsolute())
-              Flags |= ISF_Absolute;
-            Write32(Flags);
-            continue;
-          }
-        }
-
-        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
-      }
-
-      // FIXME: Check that offsets match computed ones.
-
-      // Write the symbol table entries.
-      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-        WriteNlist(LocalSymbolData[i]);
-      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-        WriteNlist(ExternalSymbolData[i]);
-      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-        WriteNlist(UndefinedSymbolData[i]);
-
-      // Write the string table.
-      OS << StringTable.str();
-    }
-  }
-
-  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) {
-    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
-
-    // FIXME: Endianness assumption.
-    assert(Fixup.Offset + Size <= DF.getContents().size() &&
-           "Invalid fixup offset!");
-    for (unsigned i = 0; i != Size; ++i)
-      DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8));
-  }
-};
-
 /* *** */
 
 MCFragment::MCFragment() : Kind(FragmentType(~0)) {
@@ -1008,14 +93,149 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
 /* *** */
 
 MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
-                         raw_ostream &_OS)
-  : Context(_Context), Backend(_Backend), OS(_OS), SubsectionsViaSymbols(false)
+                         MCCodeEmitter &_Emitter, raw_ostream &_OS)
+  : Context(_Context), Backend(_Backend), Emitter(_Emitter),
+    OS(_OS), SubsectionsViaSymbols(false)
 {
 }
 
 MCAssembler::~MCAssembler() {
 }
 
+static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
+                                                const MCAsmFixup &Fixup,
+                                                const MCDataFragment *DF,
+                                                const MCValue Target,
+                                                const MCSection *BaseSection) {
+  // The effective fixup address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  //   - addr(<base symbol>) + <fixup offset from base symbol>
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0.
+  //
+  // The simple (Darwin, except on x86_64) way of dealing with this was to
+  // assume that any reference to a temporary symbol *must* be a temporary
+  // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+  // relocation to a temporary symbol (in the same section) is fully
+  // resolved. This also works in conjunction with absolutized .set, which
+  // requires the compiler to use .set to absolutize the differences between
+  // symbols which the compiler knows to be assembly time constants, so we don't
+  // need to worry about consider symbol differences fully resolved.
+
+  // Non-relative fixups are only resolved if constant.
+  if (!BaseSection)
+    return Target.isAbsolute();
+
+  // Otherwise, relative fixups are only resolved if not a difference and the
+  // target is a temporary in the same section.
+  if (Target.isAbsolute() || Target.getSymB())
+    return false;
+
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  if (!A->isTemporary() || !A->isInSection() ||
+      &A->getSection() != BaseSection)
+    return false;
+
+  return true;
+}
+
+static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
+                                          const MCAsmFixup &Fixup,
+                                          const MCDataFragment *DF,
+                                          const MCValue Target,
+                                          const MCSymbolData *BaseSymbol) {
+  // The effective fixup address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  //   - addr(BaseSymbol) + <fixup offset from base symbol>
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0.
+  //
+  // Note that "false" is almost always conservatively correct (it means we emit
+  // a relocation which is unnecessary), except when it would force us to emit a
+  // relocation which the target cannot encode.
+
+  const MCSymbolData *A_Base = 0, *B_Base = 0;
+  if (const MCSymbolRefExpr *A = Target.getSymA()) {
+    // Modified symbol references cannot be resolved.
+    if (A->getKind() != MCSymbolRefExpr::VK_None)
+      return false;
+
+    A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol()));
+    if (!A_Base)
+      return false;
+  }
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    // Modified symbol references cannot be resolved.
+    if (B->getKind() != MCSymbolRefExpr::VK_None)
+      return false;
+
+    B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol()));
+    if (!B_Base)
+      return false;
+  }
+
+  // If there is no base, A and B have to be the same atom for this fixup to be
+  // fully resolved.
+  if (!BaseSymbol)
+    return A_Base == B_Base;
+
+  // Otherwise, B must be missing and A must be the base.
+  return !B_Base && BaseSymbol == A_Base;
+}
+
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const {
+  // Non-temporary labels should always be visible to the linker.
+  if (!SD->getSymbol().isTemporary())
+    return true;
+
+  // Absolute temporary labels are never visible.
+  if (!SD->getFragment())
+    return false;
+
+  // Otherwise, check if the section requires symbols even for temporary labels.
+  return getBackend().doesSectionRequireSymbols(
+    SD->getFragment()->getParent()->getSection());
+}
+
+const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section,
+                                                   uint64_t Address) const {
+  const MCSymbolData *Best = 0;
+  for (MCAssembler::const_symbol_iterator it = symbol_begin(),
+         ie = symbol_end(); it != ie; ++it) {
+    // Ignore non-linker visible symbols.
+    if (!isSymbolLinkerVisible(it))
+      continue;
+
+    // Ignore symbols not in the same section.
+    if (!it->getFragment() || it->getFragment()->getParent() != Section)
+      continue;
+
+    // Otherwise, find the closest symbol preceding this address (ties are
+    // resolved in favor of the last defined symbol).
+    if (it->getAddress() <= Address &&
+        (!Best || it->getAddress() >= Best->getAddress()))
+      Best = it;
+  }
+
+  return Best;
+}
+
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+  // Linker visible symbols define atoms.
+  if (isSymbolLinkerVisible(SD))
+    return SD;
+
+  // Absolute and undefined symbols have no defining atom.
+  if (!SD->getFragment())
+    return 0;
+
+  // Otherwise, search by address.
+  return getAtomForAddress(SD->getFragment()->getParent(), SD->getAddress());
+}
+
 bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
                                 MCDataFragment *DF,
                                 MCValue &Target, uint64_t &Value) const {
@@ -1028,34 +248,47 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
 
   Value = Target.getConstant();
 
-  // FIXME: This "resolved" check isn't quite right. The assumption is that if
-  // we have a PCrel access to a temporary, then that temporary is in the same
-  // atom, and so the value is resolved. We need explicit atom's to implement
-  // this more precisely.
-  bool IsResolved = true, IsPCRel = isFixupKindPCRel(Fixup.Kind);
-  if (const MCSymbol *Symbol = Target.getSymA()) {
-    if (Symbol->isDefined())
-      Value += getSymbolData(*Symbol).getAddress();
+  bool IsPCRel =
+    Emitter.getFixupKindInfo(Fixup.Kind).Flags & MCFixupKindInfo::FKF_IsPCRel;
+  bool IsResolved = true;
+  if (const MCSymbolRefExpr *A = Target.getSymA()) {
+    if (A->getSymbol().isDefined())
+      Value += getSymbolData(A->getSymbol()).getAddress();
     else
       IsResolved = false;
-
-    // With scattered symbols, we assume anything that isn't a PCrel temporary
-    // access can have an arbitrary value.
-    if (getBackend().hasScatteredSymbols() &&
-        (!IsPCRel || !Symbol->isTemporary()))
-      IsResolved = false;
   }
-  if (const MCSymbol *Symbol = Target.getSymB()) {
-    if (Symbol->isDefined())
-      Value -= getSymbolData(*Symbol).getAddress();
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    if (B->getSymbol().isDefined())
+      Value -= getSymbolData(B->getSymbol()).getAddress();
     else
       IsResolved = false;
+  }
 
-    // With scattered symbols, we assume anything that isn't a PCrel temporary
-    // access can have an arbitrary value.
-    if (getBackend().hasScatteredSymbols() &&
-        (!IsPCRel || !Symbol->isTemporary()))
-      IsResolved = false;
+  // If we are using scattered symbols, determine whether this value is actually
+  // resolved; scattering may cause atoms to move.
+  if (IsResolved && getBackend().hasScatteredSymbols()) {
+    if (getBackend().hasReliableSymbolDifference()) {
+      // If this is a PCrel relocation, find the base atom (identified by its
+      // symbol) that the fixup value is relative to.
+      const MCSymbolData *BaseSymbol = 0;
+      if (IsPCRel) {
+        BaseSymbol = getAtomForAddress(
+          DF->getParent(), DF->getAddress() + Fixup.Offset);
+        if (!BaseSymbol)
+          IsResolved = false;
+      }
+
+      if (IsResolved)
+        IsResolved = isScatteredFixupFullyResolved(*this, Fixup, DF, Target,
+                                                   BaseSymbol);
+    } else {
+      const MCSection *BaseSection = 0;
+      if (IsPCRel)
+        BaseSection = &DF->getParent()->getSection();
+
+      IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, DF, Target,
+                                                       BaseSection);
+    }
   }
 
   if (IsPCRel)
@@ -1127,7 +360,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
 
   // Set the section sizes.
   SD.setSize(Address - SD.getAddress());
-  if (isVirtualSection(SD.getSection()))
+  if (getBackend().isVirtualSection(SD.getSection()))
     SD.setFileSize(0);
   else
     SD.setFileSize(Address - SD.getAddress());
@@ -1138,7 +371,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
 /// the \arg Count is more than the maximum optimal nops.
 ///
 /// FIXME this is X86 32-bit specific and should move to a better place.
-static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
+static uint64_t WriteNopData(uint64_t Count, MCObjectWriter *OW) {
   static const uint8_t Nops[16][16] = {
     // nop
     {0x90},
@@ -1186,15 +419,14 @@ static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
     return 0;
 
   for (uint64_t i = 0; i < Count; i++)
-    MOW.Write8 (uint8_t(Nops[Count - 1][i]));
+    OW->Write8(uint8_t(Nops[Count - 1][i]));
 
   return Count;
 }
 
-/// WriteFileData - Write the \arg F data to the output file.
-static void WriteFileData(raw_ostream &OS, const MCFragment &F,
-                          MachObjectWriter &MOW) {
-  uint64_t Start = OS.tell();
+/// WriteFragmentData - Write the \arg F data to the output file.
+static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) {
+  uint64_t Start = OW->getStream().tell();
   (void) Start;
 
   ++EmittedFragments;
@@ -1218,7 +450,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
     // the Count bytes.  Then if that did not fill any bytes or there are any
     // bytes left to fill use the the Value and ValueSize to fill the rest.
     if (AF.getEmitNops()) {
-      uint64_t NopByteCount = WriteNopData(Count, MOW);
+      uint64_t NopByteCount = WriteNopData(Count, OW);
       Count -= NopByteCount;
     }
 
@@ -1226,26 +458,17 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
       switch (AF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
-      case 1: MOW.Write8 (uint8_t (AF.getValue())); break;
-      case 2: MOW.Write16(uint16_t(AF.getValue())); break;
-      case 4: MOW.Write32(uint32_t(AF.getValue())); break;
-      case 8: MOW.Write64(uint64_t(AF.getValue())); break;
+      case 1: OW->Write8 (uint8_t (AF.getValue())); break;
+      case 2: OW->Write16(uint16_t(AF.getValue())); break;
+      case 4: OW->Write32(uint32_t(AF.getValue())); break;
+      case 8: OW->Write64(uint64_t(AF.getValue())); break;
       }
     }
     break;
   }
 
   case MCFragment::FT_Data: {
-    MCDataFragment &DF = cast<MCDataFragment>(F);
-
-    // Apply the fixups.
-    //
-    // FIXME: Move elsewhere.
-    for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(),
-           ie = DF.fixup_end(); it != ie; ++it)
-      MOW.ApplyFixup(*it, DF);
-
-    OS << cast<MCDataFragment>(F).getContents().str();
+    OW->WriteBytes(cast<MCDataFragment>(F).getContents().str());
     break;
   }
 
@@ -1255,10 +478,10 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
       switch (FF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
-      case 1: MOW.Write8 (uint8_t (FF.getValue())); break;
-      case 2: MOW.Write16(uint16_t(FF.getValue())); break;
-      case 4: MOW.Write32(uint32_t(FF.getValue())); break;
-      case 8: MOW.Write64(uint64_t(FF.getValue())); break;
+      case 1: OW->Write8 (uint8_t (FF.getValue())); break;
+      case 2: OW->Write16(uint16_t(FF.getValue())); break;
+      case 4: OW->Write32(uint32_t(FF.getValue())); break;
+      case 8: OW->Write64(uint64_t(FF.getValue())); break;
       }
     }
     break;
@@ -1268,7 +491,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
     MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
     for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i)
-      MOW.Write8(uint8_t(OF.getValue()));
+      OW->Write8(uint8_t(OF.getValue()));
 
     break;
   }
@@ -1279,30 +502,29 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
   }
   }
 
-  assert(OS.tell() - Start == F.getFileSize());
+  assert(OW->getStream().tell() - Start == F.getFileSize());
 }
 
-/// WriteFileData - Write the \arg SD data to the output file.
-static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
-                          MachObjectWriter &MOW) {
+void MCAssembler::WriteSectionData(const MCSectionData *SD,
+                                   MCObjectWriter *OW) const {
   // Ignore virtual sections.
-  if (isVirtualSection(SD.getSection())) {
-    assert(SD.getFileSize() == 0);
+  if (getBackend().isVirtualSection(SD->getSection())) {
+    assert(SD->getFileSize() == 0);
     return;
   }
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = OW->getStream().tell();
   (void) Start;
 
-  for (MCSectionData::const_iterator it = SD.begin(),
-         ie = SD.end(); it != ie; ++it)
-    WriteFileData(OS, *it, MOW);
+  for (MCSectionData::const_iterator it = SD->begin(),
+         ie = SD->end(); it != ie; ++it)
+    WriteFragmentData(*it, OW);
 
   // Add section padding.
-  assert(SD.getFileSize() >= SD.getSize() && "Invalid section sizes!");
-  MOW.WriteZeros(SD.getFileSize() - SD.getSize());
+  assert(SD->getFileSize() >= SD->getSize() && "Invalid section sizes!");
+  OW->WriteZeros(SD->getFileSize() - SD->getSize());
 
-  assert(OS.tell() - Start == SD.getFileSize());
+  assert(OW->getStream().tell() - Start == SD->getFileSize());
 }
 
 void MCAssembler::Finish() {
@@ -1318,13 +540,47 @@ void MCAssembler::Finish() {
       llvm::errs() << "assembler backend - post-layout\n--\n";
       dump(); });
 
-  // Write the object file.
-  //
   // FIXME: Factor out MCObjectWriter.
-  bool Is64Bit = StringRef(getBackend().getTarget().getName()) == "x86-64";
-  MachObjectWriter MOW(OS, Is64Bit);
-  MOW.WriteObject(*this);
+  llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
+  if (!Writer)
+    llvm_report_error("unable to create object writer!");
+
+  // Allow the object writer a chance to perform post-layout binding (for
+  // example, to set the index fields in the symbol data).
+  Writer->ExecutePostLayoutBinding(*this);
+
+  // Evaluate and apply the fixups, generating relocation entries as necessary.
+  //
+  // FIXME: Share layout object.
+  MCAsmLayout Layout(*this);
+  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    for (MCSectionData::iterator it2 = it->begin(),
+           ie2 = it->end(); it2 != ie2; ++it2) {
+      MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
+      if (!DF)
+        continue;
+
+      for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+             ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+        MCAsmFixup &Fixup = *it3;
 
+        // Evaluate the fixup.
+        MCValue Target;
+        uint64_t FixedValue;
+        if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) {
+          // The fixup was unresolved, we need a relocation. Inform the object
+          // writer of the relocation, and give it an opportunity to adjust the
+          // fixup value if need be.
+          Writer->RecordRelocation(*this, *DF, Fixup, Target, FixedValue);
+        }
+
+        getBackend().ApplyFixup(Fixup, *DF, FixedValue);
+      }
+    }
+  }
+
+  // Write the object file.
+  Writer->WriteObject(*this);
   OS.flush();
 }
 
@@ -1354,7 +610,7 @@ bool MCAssembler::LayoutOnce() {
     MCSectionData &SD = *it;
 
     // Skip virtual sections.
-    if (isVirtualSection(SD.getSection()))
+    if (getBackend().isVirtualSection(SD.getSection()))
       continue;
 
     // Align this section if necessary by adding padding bytes to the previous
@@ -1377,7 +633,7 @@ bool MCAssembler::LayoutOnce() {
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     MCSectionData &SD = *it;
 
-    if (!isVirtualSection(SD.getSection()))
+    if (!getBackend().isVirtualSection(SD.getSection()))
       continue;
 
     // Align this section if necessary by adding padding bytes to the previous
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index accb06c..d513237 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -19,10 +19,10 @@ MCCodeEmitter::~MCCodeEmitter() {
 
 const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
   static const MCFixupKindInfo Builtins[] = {
-    { "FK_Data_1", 0, 8 },
-    { "FK_Data_2", 0, 16 },
-    { "FK_Data_4", 0, 32 },
-    { "FK_Data_8", 0, 64 }
+    { "FK_Data_1", 0, 8, 0 },
+    { "FK_Data_2", 0, 16, 0 },
+    { "FK_Data_4", 0, 32, 0 },
+    { "FK_Data_8", 0, 64, 0 }
   };
   
   assert(Kind <= 3 && "Unknown fixup kind");
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 70c89a2..37e8282 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -49,17 +49,6 @@ MCSymbol *MCContext::CreateTempSymbol() {
                                     "tmp" + Twine(NextUniqueID++));
 }
 
-
-MCSymbol *MCContext::GetOrCreateTemporarySymbol(StringRef Name) {
-  // If there is no name, create a new anonymous symbol.
-  // FIXME: Remove this.  This form of the method should always take a name.
-  if (Name.empty())
-    return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) +
-                                      "tmp" + Twine(NextUniqueID++));
-  
-  return GetOrCreateSymbol(Name, true);
-}
-
 MCSymbol *MCContext::GetOrCreateTemporarySymbol(const Twine &Name) {
   SmallString<128> NameSV;
   Name.toVector(NameSV);
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index a2ed20b..2759944 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -30,7 +30,7 @@ void MCExpr::print(raw_ostream &OS) const {
   case MCExpr::SymbolRef: {
     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
     const MCSymbol &Sym = SRE.getSymbol();
-    
+
     // Parenthesize names that start with $ so that they don't look like
     // absolute names.
     if (Sym.getName()[0] == '$')
@@ -59,14 +59,14 @@ void MCExpr::print(raw_ostream &OS) const {
 
   case MCExpr::Binary: {
     const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
-    
+
     // Only print parens around the LHS if it is non-trivial.
     if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
       OS << *BE.getLHS();
     } else {
       OS << '(' << *BE.getLHS() << ')';
     }
-    
+
     switch (BE.getOpcode()) {
     default: assert(0 && "Invalid opcode!");
     case MCBinaryExpr::Add:
@@ -77,7 +77,7 @@ void MCExpr::print(raw_ostream &OS) const {
           return;
         }
       }
-        
+
       OS <<  '+';
       break;
     case MCBinaryExpr::And:  OS <<  '&'; break;
@@ -98,7 +98,7 @@ void MCExpr::print(raw_ostream &OS) const {
     case MCBinaryExpr::Sub:  OS <<  '-'; break;
     case MCBinaryExpr::Xor:  OS <<  '^'; break;
     }
-    
+
     // Only print parens around the LHS if it is non-trivial.
     if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
       OS << *BE.getRHS();
@@ -193,7 +193,7 @@ void MCTargetExpr::Anchor() {}
 
 bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
   MCValue Value;
-  
+
   if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute())
     return false;
 
@@ -201,16 +201,16 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
   return true;
 }
 
-static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, 
-                                const MCSymbol *RHS_B, int64_t RHS_Cst,
+static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+                                const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
                                 MCValue &Res) {
   // We can't add or subtract two symbols.
   if ((LHS.getSymA() && RHS_A) ||
       (LHS.getSymB() && RHS_B))
     return false;
 
-  const MCSymbol *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
-  const MCSymbol *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
+  const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
+  const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
   if (B) {
     // If we have a negated symbol, then we must have also have a non-negated
     // symbol in order to encode the expression. We can do this check later to
@@ -228,13 +228,14 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
   switch (getKind()) {
   case Target:
     return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
-      
+
   case Constant:
     Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
     return true;
 
   case SymbolRef: {
-    const MCSymbol &Sym = cast<MCSymbolRefExpr>(this)->getSymbol();
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+    const MCSymbol &Sym = SRE->getSymbol();
 
     // Evaluate recursively if this is a variable.
     if (Sym.isVariable()) {
@@ -245,9 +246,12 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
       // layout object and the target requests it.
       if (Layout && Res.getSymB() &&
           Layout->getAssembler().getBackend().hasAbsolutizedSet() &&
-          Res.getSymA()->isDefined() && Res.getSymB()->isDefined()) {
-        MCSymbolData &A = Layout->getAssembler().getSymbolData(*Res.getSymA());
-        MCSymbolData &B = Layout->getAssembler().getSymbolData(*Res.getSymB());
+          Res.getSymA()->getSymbol().isDefined() &&
+          Res.getSymB()->getSymbol().isDefined()) {
+        MCSymbolData &A =
+          Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol());
+        MCSymbolData &B =
+          Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol());
         Res = MCValue::get(+ A.getFragment()->getAddress() + A.getOffset()
                            - B.getFragment()->getAddress() - B.getOffset()
                            + Res.getConstant());
@@ -256,7 +260,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
       return true;
     }
 
-    Res = MCValue::get(&Sym, 0, 0);
+    Res = MCValue::get(SRE, 0, 0);
     return true;
   }
 
@@ -277,13 +281,13 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
       /// -(a - b + const) ==> (b - a - const)
       if (Value.getSymA() && !Value.getSymB())
         return false;
-      Res = MCValue::get(Value.getSymB(), Value.getSymA(), 
-                         -Value.getConstant()); 
+      Res = MCValue::get(Value.getSymB(), Value.getSymA(),
+                         -Value.getConstant());
       break;
     case MCUnaryExpr::Not:
       if (!Value.isAbsolute())
         return false;
-      Res = MCValue::get(~Value.getConstant()); 
+      Res = MCValue::get(~Value.getConstant());
       break;
     case MCUnaryExpr::Plus:
       Res = Value;
@@ -296,7 +300,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
   case Binary: {
     const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
     MCValue LHSValue, RHSValue;
-    
+
     if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) ||
         !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout))
       return false;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 73b1074..9504392 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -45,7 +45,6 @@ class MCMachOStreamer : public MCStreamer {
 
 private:
   MCAssembler Assembler;
-  MCCodeEmitter *Emitter;
   MCSectionData *CurSectionData;
 
 private:
@@ -61,7 +60,7 @@ private:
 public:
   MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
                   raw_ostream &_OS, MCCodeEmitter *_Emitter)
-    : MCStreamer(Context), Assembler(Context, TAB, _OS), Emitter(_Emitter),
+    : MCStreamer(Context), Assembler(Context, TAB, *_Emitter, _OS),
       CurSectionData(0) {}
   ~MCMachOStreamer() {}
 
@@ -370,15 +369,12 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
     if (Inst.getOperand(i).isExpr())
       AddValueSymbols(Inst.getOperand(i).getExpr());
 
-  if (!Emitter)
-    llvm_unreachable("no code emitter available!");
-
   CurSectionData->setHasInstructions(true);
 
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+  Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
   VecOS.flush();
 
   // Add the fixups and data.
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
new file mode 100644
index 0000000..d117e82
--- /dev/null
+++ b/lib/MC/MCObjectWriter.cpp
@@ -0,0 +1,15 @@
+//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectWriter.h"
+
+using namespace llvm;
+
+MCObjectWriter::~MCObjectWriter() {
+}
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index 043a49d..c6ea16c 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -19,10 +20,12 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
     return;
   }
 
-  OS << *getSymA();
+  getSymA()->print(OS);
 
-  if (getSymB())
-    OS << " - " << *getSymB();
+  if (getSymB()) {
+    OS << " - ";
+    getSymB()->print(OS);
+  }
 
   if (getConstant())
     OS << " + " << getConstant();
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
new file mode 100644
index 0000000..4b08c22
--- /dev/null
+++ b/lib/MC/MachObjectWriter.cpp
@@ -0,0 +1,1109 @@
+//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+// FIXME: Gross.
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: llvm_unreachable("invalid fixup kind!");
+  case X86::reloc_pcrel_1byte:
+  case FK_Data_1: return 0;
+  case FK_Data_2: return 1;
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+static bool isFixupKindPCRel(unsigned Kind) {
+  switch (Kind) {
+  default:
+    return false;
+  case X86::reloc_pcrel_1byte:
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+    return true;
+  }
+}
+
+static bool isFixupKindRIPRel(unsigned Kind) {
+  return Kind == X86::reloc_riprel_4byte ||
+    Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+namespace {
+
+class MachObjectWriterImpl {
+  // See <mach-o/loader.h>.
+  enum {
+    Header_Magic32 = 0xFEEDFACE,
+    Header_Magic64 = 0xFEEDFACF
+  };
+
+  enum {
+    Header32Size = 28,
+    Header64Size = 32,
+    SegmentLoadCommand32Size = 56,
+    SegmentLoadCommand64Size = 72,
+    Section32Size = 68,
+    Section64Size = 80,
+    SymtabLoadCommandSize = 24,
+    DysymtabLoadCommandSize = 80,
+    Nlist32Size = 12,
+    Nlist64Size = 16,
+    RelocationInfoSize = 8
+  };
+
+  enum HeaderFileType {
+    HFT_Object = 0x1
+  };
+
+  enum HeaderFlags {
+    HF_SubsectionsViaSymbols = 0x2000
+  };
+
+  enum LoadCommandType {
+    LCT_Segment = 0x1,
+    LCT_Symtab = 0x2,
+    LCT_Dysymtab = 0xb,
+    LCT_Segment64 = 0x19
+  };
+
+  // See <mach-o/nlist.h>.
+  enum SymbolTypeType {
+    STT_Undefined = 0x00,
+    STT_Absolute  = 0x02,
+    STT_Section   = 0x0e
+  };
+
+  enum SymbolTypeFlags {
+    // If any of these bits are set, then the entry is a stab entry number (see
+    // <mach-o/stab.h>. Otherwise the other masks apply.
+    STF_StabsEntryMask = 0xe0,
+
+    STF_TypeMask       = 0x0e,
+    STF_External       = 0x01,
+    STF_PrivateExtern  = 0x10
+  };
+
+  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
+  /// symbol entry.
+  enum IndirectSymbolFlags {
+    ISF_Local    = 0x80000000,
+    ISF_Absolute = 0x40000000
+  };
+
+  /// RelocationFlags - Special flags for addresses.
+  enum RelocationFlags {
+    RF_Scattered = 0x80000000
+  };
+
+  enum RelocationInfoType {
+    RIT_Vanilla             = 0,
+    RIT_Pair                = 1,
+    RIT_Difference          = 2,
+    RIT_PreboundLazyPointer = 3,
+    RIT_LocalDifference     = 4
+  };
+
+  /// X86_64 uses its own relocation types.
+  enum RelocationInfoTypeX86_64 {
+    RIT_X86_64_Unsigned   = 0,
+    RIT_X86_64_Signed     = 1,
+    RIT_X86_64_Branch     = 2,
+    RIT_X86_64_GOTLoad    = 3,
+    RIT_X86_64_GOT        = 4,
+    RIT_X86_64_Subtractor = 5,
+    RIT_X86_64_Signed1    = 6,
+    RIT_X86_64_Signed2    = 7,
+    RIT_X86_64_Signed4    = 8
+  };
+
+  /// MachSymbolData - Helper struct for containing some precomputed information
+  /// on symbols.
+  struct MachSymbolData {
+    MCSymbolData *SymbolData;
+    uint64_t StringIndex;
+    uint8_t SectionIndex;
+
+    // Support lexicographic sorting.
+    bool operator<(const MachSymbolData &RHS) const {
+      const std::string &Name = SymbolData->getSymbol().getName();
+      return Name < RHS.SymbolData->getSymbol().getName();
+    }
+  };
+
+  /// @name Relocation Data
+  /// @{
+
+  struct MachRelocationEntry {
+    uint32_t Word0;
+    uint32_t Word1;
+  };
+
+  llvm::DenseMap<const MCSectionData*,
+                 std::vector<MachRelocationEntry> > Relocations;
+
+  /// @}
+  /// @name Symbol Table Data
+  /// @{
+
+  SmallString<256> StringTable;
+  std::vector<MachSymbolData> LocalSymbolData;
+  std::vector<MachSymbolData> ExternalSymbolData;
+  std::vector<MachSymbolData> UndefinedSymbolData;
+
+  /// @}
+
+  MachObjectWriter *Writer;
+
+  raw_ostream &OS;
+
+  unsigned Is64Bit : 1;
+
+public:
+  MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit)
+    : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) {
+  }
+
+  void Write8(uint8_t Value) { Writer->Write8(Value); }
+  void Write16(uint16_t Value) { Writer->Write16(Value); }
+  void Write32(uint32_t Value) { Writer->Write32(Value); }
+  void Write64(uint64_t Value) { Writer->Write64(Value); }
+  void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
+  void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    Writer->WriteBytes(Str, ZeroFillSize);
+  }
+
+  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+                   bool SubsectionsViaSymbols) {
+    uint32_t Flags = 0;
+
+    if (SubsectionsViaSymbols)
+      Flags |= HF_SubsectionsViaSymbols;
+
+    // struct mach_header (28 bytes) or
+    // struct mach_header_64 (32 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
+
+    // FIXME: Support cputype.
+    Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
+    // FIXME: Support cpusubtype.
+    Write32(MachO::CPUSubType_I386_ALL);
+    Write32(HFT_Object);
+    Write32(NumLoadCommands);    // Object files have a single load command, the
+                                 // segment.
+    Write32(LoadCommandsSize);
+    Write32(Flags);
+    if (Is64Bit)
+      Write32(0); // reserved
+
+    assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
+  }
+
+  /// WriteSegmentLoadCommand - Write a segment load command.
+  ///
+  /// \arg NumSections - The number of sections in this segment.
+  /// \arg SectionDataSize - The total size of the sections.
+  void WriteSegmentLoadCommand(unsigned NumSections,
+                               uint64_t VMSize,
+                               uint64_t SectionDataStartOffset,
+                               uint64_t SectionDataSize) {
+    // struct segment_command (56 bytes) or
+    // struct segment_command_64 (72 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
+      SegmentLoadCommand32Size;
+    Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
+    Write32(SegmentLoadCommandSize +
+            NumSections * (Is64Bit ? Section64Size : Section32Size));
+
+    WriteBytes("", 16);
+    if (Is64Bit) {
+      Write64(0); // vmaddr
+      Write64(VMSize); // vmsize
+      Write64(SectionDataStartOffset); // file offset
+      Write64(SectionDataSize); // file size
+    } else {
+      Write32(0); // vmaddr
+      Write32(VMSize); // vmsize
+      Write32(SectionDataStartOffset); // file offset
+      Write32(SectionDataSize); // file size
+    }
+    Write32(0x7); // maxprot
+    Write32(0x7); // initprot
+    Write32(NumSections);
+    Write32(0); // flags
+
+    assert(OS.tell() - Start == SegmentLoadCommandSize);
+  }
+
+  void WriteSection(const MCAssembler &Asm, const MCSectionData &SD,
+                    uint64_t FileOffset, uint64_t RelocationsStart,
+                    unsigned NumRelocations) {
+    // The offset is unused for virtual sections.
+    if (Asm.getBackend().isVirtualSection(SD.getSection())) {
+      assert(SD.getFileSize() == 0 && "Invalid file size!");
+      FileOffset = 0;
+    }
+
+    // struct section (68 bytes) or
+    // struct section_64 (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    // FIXME: cast<> support!
+    const MCSectionMachO &Section =
+      static_cast<const MCSectionMachO&>(SD.getSection());
+    WriteBytes(Section.getSectionName(), 16);
+    WriteBytes(Section.getSegmentName(), 16);
+    if (Is64Bit) {
+      Write64(SD.getAddress()); // address
+      Write64(SD.getSize()); // size
+    } else {
+      Write32(SD.getAddress()); // address
+      Write32(SD.getSize()); // size
+    }
+    Write32(FileOffset);
+
+    unsigned Flags = Section.getTypeAndAttributes();
+    if (SD.hasInstructions())
+      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+    Write32(Log2_32(SD.getAlignment()));
+    Write32(NumRelocations ? RelocationsStart : 0);
+    Write32(NumRelocations);
+    Write32(Flags);
+    Write32(0); // reserved1
+    Write32(Section.getStubSize()); // reserved2
+    if (Is64Bit)
+      Write32(0); // reserved3
+
+    assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
+  }
+
+  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+                              uint32_t StringTableOffset,
+                              uint32_t StringTableSize) {
+    // struct symtab_command (24 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Symtab);
+    Write32(SymtabLoadCommandSize);
+    Write32(SymbolOffset);
+    Write32(NumSymbols);
+    Write32(StringTableOffset);
+    Write32(StringTableSize);
+
+    assert(OS.tell() - Start == SymtabLoadCommandSize);
+  }
+
+  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                uint32_t NumLocalSymbols,
+                                uint32_t FirstExternalSymbol,
+                                uint32_t NumExternalSymbols,
+                                uint32_t FirstUndefinedSymbol,
+                                uint32_t NumUndefinedSymbols,
+                                uint32_t IndirectSymbolOffset,
+                                uint32_t NumIndirectSymbols) {
+    // struct dysymtab_command (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Dysymtab);
+    Write32(DysymtabLoadCommandSize);
+    Write32(FirstLocalSymbol);
+    Write32(NumLocalSymbols);
+    Write32(FirstExternalSymbol);
+    Write32(NumExternalSymbols);
+    Write32(FirstUndefinedSymbol);
+    Write32(NumUndefinedSymbols);
+    Write32(0); // tocoff
+    Write32(0); // ntoc
+    Write32(0); // modtaboff
+    Write32(0); // nmodtab
+    Write32(0); // extrefsymoff
+    Write32(0); // nextrefsyms
+    Write32(IndirectSymbolOffset);
+    Write32(NumIndirectSymbols);
+    Write32(0); // extreloff
+    Write32(0); // nextrel
+    Write32(0); // locreloff
+    Write32(0); // nlocrel
+
+    assert(OS.tell() - Start == DysymtabLoadCommandSize);
+  }
+
+  void WriteNlist(MachSymbolData &MSD) {
+    MCSymbolData &Data = *MSD.SymbolData;
+    const MCSymbol &Symbol = Data.getSymbol();
+    uint8_t Type = 0;
+    uint16_t Flags = Data.getFlags();
+    uint32_t Address = 0;
+
+    // Set the N_TYPE bits. See <mach-o/nlist.h>.
+    //
+    // FIXME: Are the prebound or indirect fields possible here?
+    if (Symbol.isUndefined())
+      Type = STT_Undefined;
+    else if (Symbol.isAbsolute())
+      Type = STT_Absolute;
+    else
+      Type = STT_Section;
+
+    // FIXME: Set STAB bits.
+
+    if (Data.isPrivateExtern())
+      Type |= STF_PrivateExtern;
+
+    // Set external bit.
+    if (Data.isExternal() || Symbol.isUndefined())
+      Type |= STF_External;
+
+    // Compute the symbol address.
+    if (Symbol.isDefined()) {
+      if (Symbol.isAbsolute()) {
+        llvm_unreachable("FIXME: Not yet implemented!");
+      } else {
+        Address = Data.getAddress();
+      }
+    } else if (Data.isCommon()) {
+      // Common symbols are encoded with the size in the address
+      // field, and their alignment in the flags.
+      Address = Data.getCommonSize();
+
+      // Common alignment is packed into the 'desc' bits.
+      if (unsigned Align = Data.getCommonAlignment()) {
+        unsigned Log2Size = Log2_32(Align);
+        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+        if (Log2Size > 15)
+          llvm_report_error("invalid 'common' alignment '" +
+                            Twine(Align) + "'");
+        // FIXME: Keep this mask with the SymbolFlags enumeration.
+        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+      }
+    }
+
+    // struct nlist (12 bytes)
+
+    Write32(MSD.StringIndex);
+    Write8(Type);
+    Write8(MSD.SectionIndex);
+
+    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+    // value.
+    Write16(Flags);
+    if (Is64Bit)
+      Write64(Address);
+    else
+      Write32(Address);
+  }
+
+  void RecordX86_64Relocation(const MCAssembler &Asm,
+                              const MCDataFragment &Fragment,
+                              const MCAsmFixup &Fixup, MCValue Target,
+                              uint64_t &FixedValue) {
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
+    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind);
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+
+    // See <reloc.h>.
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+    int64_t Value = 0;
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    Value = Target.getConstant();
+
+    if (IsPCRel) {
+      // Compensate for the relocation offset, Darwin x86_64 relocations only
+      // have the addend and appear to have attempted to define it to be the
+      // actual expression addend without the PCrel bias. However, instructions
+      // with data following the relocation are not accomodated for (see comment
+      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
+      Value += 1 << Log2Size;
+    }
+
+    if (Target.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      Type = RIT_X86_64_Unsigned;
+      Index = 0;
+
+      // FIXME: I believe this is broken, I don't think the linker can
+      // understand it. I think it would require a local relocation, but I'm not
+      // sure if that would work either. The official way to get an absolute
+      // PCrel relocation is to use an absolute symbol (which we don't support
+      // yet).
+      if (IsPCRel) {
+        IsExtern = 1;
+        Type = RIT_X86_64_Branch;
+      }
+    } else if (Target.getSymB()) { // A - B + constant
+      const MCSymbol *A = &Target.getSymA()->getSymbol();
+      MCSymbolData &A_SD = Asm.getSymbolData(*A);
+      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+      const MCSymbol *B = &Target.getSymB()->getSymbol();
+      MCSymbolData &B_SD = Asm.getSymbolData(*B);
+      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+      // Neither symbol can be modified.
+      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+        llvm_report_error("unsupported relocation of modified symbol");
+
+      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+      // implement most of these correctly.
+      if (IsPCRel)
+        llvm_report_error("unsupported pc-relative relocation of difference");
+
+      // We don't currently support any situation where one or both of the
+      // symbols would require a local relocation. This is almost certainly
+      // unused and may not be possible to encode correctly.
+      if (!A_Base || !B_Base)
+        llvm_report_error("unsupported local relocations in difference");
+
+      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
+      // a single SIGNED relocation); reject it for now.
+      if (A_Base == B_Base)
+        llvm_report_error("unsupported relocation with identical base");
+
+      Value += A_SD.getAddress() - A_Base->getAddress();
+      Value -= B_SD.getAddress() - B_Base->getAddress();
+
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+      Type = RIT_X86_64_Unsigned;
+
+      MachRelocationEntry MRE;
+      MRE.Word0 = Address;
+      MRE.Word1 = ((Index     <<  0) |
+                   (IsPCRel   << 24) |
+                   (Log2Size  << 25) |
+                   (IsExtern  << 27) |
+                   (Type      << 28));
+      Relocations[Fragment.getParent()].push_back(MRE);
+
+      Index = B_Base->getIndex();
+      IsExtern = 1;
+      Type = RIT_X86_64_Subtractor;
+    } else {
+      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+      const MCSymbolData *Base = Asm.getAtom(&SD);
+
+      // x86_64 almost always uses external relocations, except when there is no
+      // symbol to use as a base address (a local symbol with no preceeding
+      // non-local symbol).
+      if (Base) {
+        Index = Base->getIndex();
+        IsExtern = 1;
+
+        // Add the local offset, if needed.
+        if (Base != &SD)
+          Value += SD.getAddress() - Base->getAddress();
+      } else {
+        // The index is the section ordinal.
+        //
+        // FIXME: O(N)
+        Index = 1;
+        MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+        for (; it != ie; ++it, ++Index)
+          if (&*it == SD.getFragment()->getParent())
+            break;
+        assert(it != ie && "Unable to find section index!");
+        IsExtern = 0;
+        Value += SD.getAddress();
+
+        if (IsPCRel)
+          Value -= Address + (1 << Log2Size);
+      }
+
+      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+      if (IsPCRel) {
+        if (IsRIPRel) {
+          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+            // rewrite the movq to an leaq at link time if the symbol ends up in
+            // the same linkage unit.
+            if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load)
+              Type = RIT_X86_64_GOTLoad;
+            else
+              Type = RIT_X86_64_GOT;
+          } else if (Modifier != MCSymbolRefExpr::VK_None)
+            llvm_report_error("unsupported symbol modifier in relocation");
+          else
+            Type = RIT_X86_64_Signed;
+        } else {
+          if (Modifier != MCSymbolRefExpr::VK_None)
+            llvm_report_error("unsupported symbol modifier in branch "
+                              "relocation");
+
+          Type = RIT_X86_64_Branch;
+        }
+
+        // The Darwin x86_64 relocation format has a problem where it cannot
+        // encode an address (L<foo> + <constant>) which is outside the atom
+        // containing L<foo>. Generally, this shouldn't occur but it does happen
+        // when we have a RIPrel instruction with data following the relocation
+        // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment
+        // Darwin x86_64 uses, the offset is still negative and the linker has
+        // no way to recognize this.
+        //
+        // To work around this, Darwin uses several special relocation types to
+        // indicate the offsets. However, the specification or implementation of
+        // these seems to also be incomplete; they should adjust the addend as
+        // well based on the actual encoded instruction (the additional bias),
+        // but instead appear to just look at the final offset.
+        if (IsRIPRel) {
+          switch (-(Target.getConstant() + (1 << Log2Size))) {
+          case 1: Type = RIT_X86_64_Signed1; break;
+          case 2: Type = RIT_X86_64_Signed2; break;
+          case 4: Type = RIT_X86_64_Signed4; break;
+          }
+        }
+      } else {
+        if (Modifier == MCSymbolRefExpr::VK_GOT)
+          Type = RIT_X86_64_GOT;
+        else if (Modifier != MCSymbolRefExpr::VK_None)
+          llvm_report_error("unsupported symbol modifier in relocation");
+        else
+          Type = RIT_X86_64_Unsigned;
+      }
+    }
+
+    // x86_64 always writes custom values into the fixups.
+    FixedValue = Value;
+
+    // struct relocation_info (8 bytes)
+    MachRelocationEntry MRE;
+    MRE.Word0 = Address;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment.getParent()].push_back(MRE);
+  }
+
+  void RecordScatteredRelocation(const MCAssembler &Asm,
+                                 const MCFragment &Fragment,
+                                 const MCAsmFixup &Fixup, MCValue Target,
+                                 uint64_t &FixedValue) {
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+    unsigned Type = RIT_Vanilla;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      llvm_report_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = A_SD->getAddress();
+    uint32_t Value2 = 0;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        llvm_report_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      //
+      // Note that there is no longer any semantic difference between these two
+      // relocation types from the linkers point of view, this is done solely
+      // for pedantic compatibility with 'as'.
+      Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
+      Value2 = B_SD->getAddress();
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    if (Type == RIT_Difference || Type == RIT_LocalDifference) {
+      MachRelocationEntry MRE;
+      MRE.Word0 = ((0         <<  0) |
+                   (RIT_Pair  << 24) |
+                   (Log2Size  << 28) |
+                   (IsPCRel   << 30) |
+                   RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment.getParent()].push_back(MRE);
+    }
+
+    MachRelocationEntry MRE;
+    MRE.Word0 = ((Address   <<  0) |
+                 (Type      << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment.getParent()].push_back(MRE);
+  }
+
+  void RecordRelocation(const MCAssembler &Asm, const MCDataFragment &Fragment,
+                        const MCAsmFixup &Fixup, MCValue Target,
+                        uint64_t &FixedValue) {
+    if (Is64Bit) {
+      RecordX86_64Relocation(Asm, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+
+    // If this is a difference or a defined symbol plus an offset, then we need
+    // a scattered relocation entry.
+    uint32_t Offset = Target.getConstant();
+    if (IsPCRel)
+      Offset += 1 << Log2Size;
+    if (Target.getSymB() ||
+        (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() &&
+         Offset)) {
+      RecordScatteredRelocation(Asm, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+
+    // See <reloc.h>.
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+    uint32_t Value = 0;
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    if (Target.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      //
+      // FIXME: Currently, these are never generated (see code below). I cannot
+      // find a case where they are actually emitted.
+      Type = RIT_Vanilla;
+      Value = 0;
+    } else {
+      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+      MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
+
+      if (Symbol->isUndefined()) {
+        IsExtern = 1;
+        Index = SD->getIndex();
+        Value = 0;
+      } else {
+        // The index is the section ordinal.
+        //
+        // FIXME: O(N)
+        Index = 1;
+        MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+        for (; it != ie; ++it, ++Index)
+          if (&*it == SD->getFragment()->getParent())
+            break;
+        assert(it != ie && "Unable to find section index!");
+        Value = SD->getAddress();
+      }
+
+      Type = RIT_Vanilla;
+    }
+
+    // struct relocation_info (8 bytes)
+    MachRelocationEntry MRE;
+    MRE.Word0 = Address;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment.getParent()].push_back(MRE);
+  }
+
+  void BindIndirectSymbols(MCAssembler &Asm) {
+    // This is the point where 'as' creates actual symbols for indirect symbols
+    // (in the following two passes). It would be easier for us to do this
+    // sooner when we see the attribute, but that makes getting the order in the
+    // symbol table much more complicated than it is worth.
+    //
+    // FIXME: Revisit this when the dust settles.
+
+    // Bind non lazy symbol pointers first.
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // FIXME: cast<> support!
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+
+      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+        continue;
+
+      Asm.getOrCreateSymbolData(*it->Symbol);
+    }
+
+    // Then lazy symbol pointers and symbol stubs.
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // FIXME: cast<> support!
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+
+      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+        continue;
+
+      // Set the symbol type to undefined lazy, but only on construction.
+      //
+      // FIXME: Do not hardcode.
+      bool Created;
+      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+      if (Created)
+        Entry.setFlags(Entry.getFlags() | 0x0001);
+    }
+  }
+
+  /// ComputeSymbolTable - Compute the symbol table data
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData) {
+    // Build section lookup table.
+    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+    unsigned Index = 1;
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it, ++Index)
+      SectionIndexMap[&it->getSection()] = Index;
+    assert(Index <= 256 && "Too many sections!");
+
+    // Index 0 is always the empty string.
+    StringMap<uint64_t> StringIndexMap;
+    StringTable += '\x00';
+
+    // Build the symbol arrays and the string table, but only for non-local
+    // symbols.
+    //
+    // The particular order that we collect the symbols and create the string
+    // table, then sort the symbols is chosen to match 'as'. Even though it
+    // doesn't matter for correctness, this is important for letting us diff .o
+    // files.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore non-linker visible symbols.
+      if (!Asm.isSymbolLinkerVisible(it))
+        continue;
+
+      if (!it->isExternal() && !Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isUndefined()) {
+        MSD.SectionIndex = 0;
+        UndefinedSymbolData.push_back(MSD);
+      } else if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        ExternalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        ExternalSymbolData.push_back(MSD);
+      }
+    }
+
+    // Now add the data for local symbols.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore non-linker visible symbols.
+      if (!Asm.isSymbolLinkerVisible(it))
+        continue;
+
+      if (it->isExternal() || Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        LocalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        LocalSymbolData.push_back(MSD);
+      }
+    }
+
+    // External and undefined symbols are required to be in lexicographic order.
+    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+    // Set the symbol indices.
+    Index = 0;
+    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+      LocalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+      ExternalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+    // The string table is padded to a multiple of 4.
+    while (StringTable.size() % 4)
+      StringTable += '\x00';
+  }
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm) {
+    // Create symbol data for any indirect symbols.
+    BindIndirectSymbols(Asm);
+
+    // Compute symbol table information and bind symbol indices.
+    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                       UndefinedSymbolData);
+  }
+
+  void WriteObject(const MCAssembler &Asm) {
+    unsigned NumSections = Asm.size();
+
+    // The section data starts after the header, the segment load command (and
+    // section headers) and the symbol table.
+    unsigned NumLoadCommands = 1;
+    uint64_t LoadCommandsSize = Is64Bit ?
+      SegmentLoadCommand64Size + NumSections * Section64Size :
+      SegmentLoadCommand32Size + NumSections * Section32Size;
+
+    // Add the symbol table load command sizes, if used.
+    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+      UndefinedSymbolData.size();
+    if (NumSymbols) {
+      NumLoadCommands += 2;
+      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
+    }
+
+    // Compute the total size of the section data, as well as its file size and
+    // vm size.
+    uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
+      + LoadCommandsSize;
+    uint64_t SectionDataSize = 0;
+    uint64_t SectionDataFileSize = 0;
+    uint64_t VMSize = 0;
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      const MCSectionData &SD = *it;
+
+      VMSize = std::max(VMSize, SD.getAddress() + SD.getSize());
+
+      if (Asm.getBackend().isVirtualSection(SD.getSection()))
+        continue;
+
+      SectionDataSize = std::max(SectionDataSize,
+                                 SD.getAddress() + SD.getSize());
+      SectionDataFileSize = std::max(SectionDataFileSize,
+                                     SD.getAddress() + SD.getFileSize());
+    }
+
+    // The section data is padded to 4 bytes.
+    //
+    // FIXME: Is this machine dependent?
+    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+    SectionDataFileSize += SectionDataPadding;
+
+    // Write the prolog, starting with the header and load command...
+    WriteHeader(NumLoadCommands, LoadCommandsSize,
+                Asm.getSubsectionsViaSymbols());
+    WriteSegmentLoadCommand(NumSections, VMSize,
+                            SectionDataStart, SectionDataSize);
+
+    // ... and then the section headers.
+    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+      unsigned NumRelocs = Relocs.size();
+      uint64_t SectionStart = SectionDataStart + it->getAddress();
+      WriteSection(Asm, *it, SectionStart, RelocTableEnd, NumRelocs);
+      RelocTableEnd += NumRelocs * RelocationInfoSize;
+    }
+
+    // Write the symbol table load command, if used.
+    if (NumSymbols) {
+      unsigned FirstLocalSymbol = 0;
+      unsigned NumLocalSymbols = LocalSymbolData.size();
+      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+      unsigned NumExternalSymbols = ExternalSymbolData.size();
+      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+      unsigned NumSymTabSymbols =
+        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+      uint64_t IndirectSymbolOffset = 0;
+
+      // If used, the indirect symbols are written after the section data.
+      if (NumIndirectSymbols)
+        IndirectSymbolOffset = RelocTableEnd;
+
+      // The symbol table is written after the indirect symbol data.
+      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+      // The string table is written after symbol table.
+      uint64_t StringTableOffset =
+        SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
+                                                Nlist32Size);
+      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                             StringTableOffset, StringTable.size());
+
+      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                               FirstExternalSymbol, NumExternalSymbols,
+                               FirstUndefinedSymbol, NumUndefinedSymbols,
+                               IndirectSymbolOffset, NumIndirectSymbols);
+    }
+
+    // Write the actual section data.
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it)
+      Asm.WriteSectionData(it, Writer);
+
+    // Write the extra padding.
+    WriteZeros(SectionDataPadding);
+
+    // Write the relocation entries.
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      // Write the section relocation entries, in reverse order to match 'as'
+      // (approximately, the exact algorithm is more complicated than this).
+      std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+        Write32(Relocs[e - i - 1].Word0);
+        Write32(Relocs[e - i - 1].Word1);
+      }
+    }
+
+    // Write the symbol table data, if used.
+    if (NumSymbols) {
+      // Write the indirect symbol entries.
+      for (MCAssembler::const_indirect_symbol_iterator
+             it = Asm.indirect_symbol_begin(),
+             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+        // Indirect symbols in the non lazy symbol pointer section have some
+        // special handling.
+        const MCSectionMachO &Section =
+          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+          // If this symbol is defined and internal, mark it as such.
+          if (it->Symbol->isDefined() &&
+              !Asm.getSymbolData(*it->Symbol).isExternal()) {
+            uint32_t Flags = ISF_Local;
+            if (it->Symbol->isAbsolute())
+              Flags |= ISF_Absolute;
+            Write32(Flags);
+            continue;
+          }
+        }
+
+        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+      }
+
+      // FIXME: Check that offsets match computed ones.
+
+      // Write the symbol table entries.
+      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+        WriteNlist(LocalSymbolData[i]);
+      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+        WriteNlist(ExternalSymbolData[i]);
+      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+        WriteNlist(UndefinedSymbolData[i]);
+
+      // Write the string table.
+      OS << StringTable.str();
+    }
+  }
+};
+
+}
+
+MachObjectWriter::MachObjectWriter(raw_ostream &OS,
+                                   bool Is64Bit,
+                                   bool IsLittleEndian)
+  : MCObjectWriter(OS, IsLittleEndian)
+{
+  Impl = new MachObjectWriterImpl(this, Is64Bit);
+}
+
+MachObjectWriter::~MachObjectWriter() {
+  delete (MachObjectWriterImpl*) Impl;
+}
+
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+  ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
+}
+
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                        const MCDataFragment &Fragment,
+                                        const MCAsmFixup &Fixup, MCValue Target,
+                                        uint64_t &FixedValue) {
+  ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Fragment, Fixup,
+                                                   Target, FixedValue);
+}
+
+void MachObjectWriter::WriteObject(const MCAssembler &Asm) {
+  ((MachObjectWriterImpl*) Impl)->WriteObject(Asm);
+}
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp
index 918d272..bbfddbe 100644
--- a/lib/MC/TargetAsmBackend.cpp
+++ b/lib/MC/TargetAsmBackend.cpp
@@ -11,7 +11,10 @@
 using namespace llvm;
 
 TargetAsmBackend::TargetAsmBackend(const Target &T)
-  : TheTarget(T)
+  : TheTarget(T),
+    HasAbsolutizedSet(false),
+    HasReliableSymbolDifference(false),
+    HasScatteredSymbols(false)
 {
 }
 
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 954dc77..3f467fe 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
@@ -130,6 +131,15 @@ bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
   return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0;
 }
 
+/// Intern - Copy this node's data to a memory region allocated from the
+/// given allocator and return a FoldingSetNodeIDRef describing the
+/// interned data.
+FoldingSetNodeIDRef
+FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
+  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
+  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+  return FoldingSetNodeIDRef(New, Bits.size());
+}
 
 //===----------------------------------------------------------------------===//
 /// Helper functions for FoldingSetImpl.
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index c8ec68a..56bf9e7 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -39,8 +39,8 @@ static SmartMutex<true> SignalsMutex;
 /// InterruptFunction - The function to call if ctrl-c is pressed.
 static void (*InterruptFunction)() = 0;
 
-static std::vector<sys::Path> *FilesToRemove = 0;
-static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static std::vector<sys::Path> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
 
 // IntSigs - Signals that may interrupt the program at any time.
 static const int IntSigs[] = {
@@ -126,11 +126,10 @@ static RETSIGTYPE SignalHandler(int Sig) {
   sigprocmask(SIG_UNBLOCK, &SigMask, 0);
 
   SignalsMutex.acquire();
-  if (FilesToRemove != 0)
-    while (!FilesToRemove->empty()) {
-      FilesToRemove->back().eraseFromDisk(true);
-      FilesToRemove->pop_back();
-    }
+  while (!FilesToRemove.empty()) {
+    FilesToRemove.back().eraseFromDisk(true);
+    FilesToRemove.pop_back();
+  }
 
   if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
     if (InterruptFunction) {
@@ -149,9 +148,8 @@ static RETSIGTYPE SignalHandler(int Sig) {
   SignalsMutex.release();
 
   // Otherwise if it is a fault (like SEGV) run any handler.
-  if (CallBacksToRun)
-    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
-      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+  for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+    CallBacksToRun[i].first(CallBacksToRun[i].second);
 }
 
 
@@ -167,10 +165,7 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
 bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
                                    std::string* ErrMsg) {
   SignalsMutex.acquire();
-  if (FilesToRemove == 0)
-    FilesToRemove = new std::vector<sys::Path>();
-
-  FilesToRemove->push_back(Filename);
+  FilesToRemove.push_back(Filename);
 
   SignalsMutex.release();
 
@@ -182,9 +177,7 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
 /// to the process.  The handler can have a cookie passed to it to identify
 /// what instance of the handler it is.
 void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
-  if (CallBacksToRun == 0)
-    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
-  CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+  CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
   RegisterHandlers();
 }
 
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index ddeb1b9..ea62c33 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -35,6 +35,10 @@ namespace ARM_AM {
     add = '+', sub = '-'
   };
 
+  static inline const char *getAddrOpcStr(AddrOpc Op) {
+    return Op == sub ? "-" : "";
+  }
+
   static inline const char *getShiftOpcStr(ShiftOpc Op) {
     switch (Op) {
     default: assert(0 && "Unknown shift opc!");
@@ -78,16 +82,6 @@ namespace ARM_AM {
     }
   }
 
-  static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
-    switch (Mode) {
-    default: assert(0 && "Unknown addressing sub-mode!");
-    case ARM_AM::ia: return isLD ? "fd" : "ea";
-    case ARM_AM::ib: return isLD ? "ed" : "fa";
-    case ARM_AM::da: return isLD ? "fa" : "ed";
-    case ARM_AM::db: return isLD ? "ea" : "fd";
-    }
-  }
-
   /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
   ///
   static inline unsigned rotr32(unsigned Val, unsigned Amt) {
@@ -473,20 +467,13 @@ namespace ARM_AM {
   //    IB - Increment before
   //    DA - Decrement after
   //    DB - Decrement before
-  //
-  // If the 4th bit (writeback)is set, then the base register is updated after
-  // the memory transfer.
 
   static inline AMSubMode getAM4SubMode(unsigned Mode) {
     return (AMSubMode)(Mode & 0x7);
   }
 
-  static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
-    return (int)SubMode | ((int)WB << 3);
-  }
-
-  static inline bool getAM4WBFlag(unsigned Mode) {
-    return (Mode >> 3) & 1;
+  static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+    return (int)SubMode;
   }
 
   //===--------------------------------------------------------------------===//
@@ -501,9 +488,9 @@ namespace ARM_AM {
   // operation in bit 8 and the immediate in bits 0-7.
   //
   // This is also used for FP load/store multiple ops. The second operand
-  // encodes the writeback mode in bit 8 and the number of registers (or 2
-  // times the number of registers for DPR ops) in bits 0-7. In addition,
-  // bits 9-11 encode one of the following two sub-modes:
+  // encodes the number of registers (or 2 times the number of registers
+  // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
+  // following two sub-modes:
   //
   //    IA - Increment after
   //    DB - Decrement before
@@ -522,17 +509,13 @@ namespace ARM_AM {
 
   /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
   /// VSTM instructions.
-  static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
-                                   unsigned char Offset) {
+  static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
     assert((SubMode == ia || SubMode == db) &&
            "Illegal addressing mode 5 sub-mode!");
-    return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+    return ((int)SubMode << 8) | Offset;
   }
   static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
-    return (AMSubMode)((AM5Opc >> 9) & 0x7);
-  }
-  static inline bool getAM5WBFlag(unsigned AM5Opc) {
-    return ((AM5Opc >> 8) & 1);
+    return (AMSubMode)((AM5Opc >> 8) & 0x7);
   }
 
   //===--------------------------------------------------------------------===//
@@ -541,23 +524,11 @@ namespace ARM_AM {
   //
   // This is used for NEON load / store instructions.
   //
-  // addrmode6 := reg with optional writeback and alignment
+  // addrmode6 := reg with optional alignment
   //
-  // This is stored in four operands [regaddr, regupdate, opc, align].  The
-  // first is the address register.  The second register holds the value of
-  // a post-access increment for writeback or reg0 if no writeback or if the
-  // writeback increment is the size of the memory access.  The third
-  // operand encodes whether there is writeback to the address register. The
-  // fourth operand is the value of the alignment specifier to use or zero if
-  // no explicit alignment.
-
-  static inline unsigned getAM6Opc(bool WB = false) {
-    return (int)WB;
-  }
-
-  static inline bool getAM6WBFlag(unsigned Mode) {
-    return Mode & 1;
-  }
+  // This is stored in two operands [regaddr, align].  The first is the
+  // address register.  The second operand is the value of the alignment
+  // specifier to use or zero if no explicit alignment.
 
 } // end namespace ARM_AM
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8e537d8..e6ea03a 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -650,39 +650,49 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   if (SrcRC == ARM::tGPRRegisterClass)
     SrcRC = ARM::GPRRegisterClass;
 
-  if (DestRC != SrcRC) {
-    if (DestRC->getSize() != SrcRC->getSize())
-      return false;
+  // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
+  if (DestRC == ARM::DPR_8RegisterClass)
+    DestRC = ARM::DPR_VFP2RegisterClass;
+  if (SrcRC == ARM::DPR_8RegisterClass)
+    SrcRC = ARM::DPR_VFP2RegisterClass;
+
+  // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
+  if (DestRC == ARM::QPR_VFP2RegisterClass ||
+      DestRC == ARM::QPR_8RegisterClass)
+    DestRC = ARM::QPRRegisterClass;
+  if (SrcRC == ARM::QPR_VFP2RegisterClass ||
+      SrcRC == ARM::QPR_8RegisterClass)
+    SrcRC = ARM::QPRRegisterClass;
+
+  // Disallow copies of unequal sizes.
+  if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
+    return false;
 
-    // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
-    // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
-    if (DestRC->getSize() != 8 && DestRC->getSize() != 16)
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::SPRRegisterClass)
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
+                     .addReg(SrcReg));
+    else
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
+                                          DestReg).addReg(SrcReg)));
+  } else {
+    unsigned Opc;
+
+    if (DestRC == ARM::SPRRegisterClass)
+      Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
+    else if (DestRC == ARM::DPRRegisterClass)
+      Opc = ARM::VMOVD;
+    else if (DestRC == ARM::DPR_VFP2RegisterClass ||
+             SrcRC == ARM::DPR_VFP2RegisterClass)
+      // Always use neon reg-reg move if source or dest is NEON-only regclass.
+      Opc = ARM::VMOVDneon;
+    else if (DestRC == ARM::QPRRegisterClass)
+      Opc = ARM::VMOVQ;
+    else
       return false;
-  }
 
-  if (DestRC == ARM::GPRRegisterClass) {
-    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
-                                        DestReg).addReg(SrcReg)));
-  } else if (DestRC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg)
-                   .addReg(SrcReg));
-  } else if (DestRC == ARM::DPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg)
                    .addReg(SrcReg));
-  } else if (DestRC == ARM::DPR_VFP2RegisterClass ||
-             DestRC == ARM::DPR_8RegisterClass ||
-             SrcRC == ARM::DPR_VFP2RegisterClass ||
-             SrcRC == ARM::DPR_8RegisterClass) {
-    // Always use neon reg-reg move if source or dest is NEON-only regclass.
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon),
-                           DestReg).addReg(SrcReg));
-  } else if (DestRC == ARM::QPRRegisterClass ||
-             DestRC == ARM::QPR_VFP2RegisterClass ||
-             DestRC == ARM::QPR_8RegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ),
-                           DestReg).addReg(SrcReg));
-  } else {
-    return false;
   }
 
   return true;
@@ -727,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     assert((RC == ARM::QPRRegisterClass ||
             RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
     // FIXME: Neon instructions should support predicates
-    if (Align >= 16
-        && (getRegisterInfo().canRealignStack(MF))) {
+    if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
-                     .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+                     .addFrameIndex(FI).addImm(128)
                      .addMemOperand(MMO)
                      .addReg(SrcReg, getKillRegState(isKill)));
     } else {
@@ -780,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (Align >= 16
         && (getRegisterInfo().canRealignStack(MF))) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
-                     .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+                     .addFrameIndex(FI).addImm(128)
                      .addMemOperand(MMO));
     } else {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 767d5ec..292c498 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -93,34 +93,34 @@ namespace ARMII {
     StMiscFrm     = 9  << FormShift,
     LdStMulFrm    = 10 << FormShift,
 
-    LdStExFrm     = 28 << FormShift,
+    LdStExFrm     = 11 << FormShift,
 
     // Miscellaneous arithmetic instructions
-    ArithMiscFrm  = 11 << FormShift,
+    ArithMiscFrm  = 12 << FormShift,
 
     // Extend instructions
-    ExtFrm        = 12 << FormShift,
+    ExtFrm        = 13 << FormShift,
 
     // VFP formats
-    VFPUnaryFrm   = 13 << FormShift,
-    VFPBinaryFrm  = 14 << FormShift,
-    VFPConv1Frm   = 15 << FormShift,
-    VFPConv2Frm   = 16 << FormShift,
-    VFPConv3Frm   = 17 << FormShift,
-    VFPConv4Frm   = 18 << FormShift,
-    VFPConv5Frm   = 19 << FormShift,
-    VFPLdStFrm    = 20 << FormShift,
-    VFPLdStMulFrm = 21 << FormShift,
-    VFPMiscFrm    = 22 << FormShift,
+    VFPUnaryFrm   = 14 << FormShift,
+    VFPBinaryFrm  = 15 << FormShift,
+    VFPConv1Frm   = 16 << FormShift,
+    VFPConv2Frm   = 17 << FormShift,
+    VFPConv3Frm   = 18 << FormShift,
+    VFPConv4Frm   = 19 << FormShift,
+    VFPConv5Frm   = 20 << FormShift,
+    VFPLdStFrm    = 21 << FormShift,
+    VFPLdStMulFrm = 22 << FormShift,
+    VFPMiscFrm    = 23 << FormShift,
 
     // Thumb format
-    ThumbFrm      = 23 << FormShift,
+    ThumbFrm      = 24 << FormShift,
 
     // NEON format
-    NEONFrm       = 24 << FormShift,
-    NEONGetLnFrm  = 25 << FormShift,
-    NEONSetLnFrm  = 26 << FormShift,
-    NEONDupFrm    = 27 << FormShift,
+    NEONFrm       = 25 << FormShift,
+    NEONGetLnFrm  = 26 << FormShift,
+    NEONSetLnFrm  = 27 << FormShift,
+    NEONDupFrm    = 28 << FormShift,
 
     //===------------------------------------------------------------------===//
     // Misc flags.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 11e1c48..b380c95 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -80,7 +80,7 @@ unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
   case D23: return 23;
   case D24: return 24;
   case D25: return 25;
-  case D26: return 27;
+  case D26: return 26;
   case D27: return 27;
   case D28: return 28;
   case D29: return 29;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 334c820..e7aa0c8 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -55,12 +55,12 @@ namespace {
     const std::vector<MachineConstantPoolEntry> *MCPEs;
     const std::vector<MachineJumpTableEntry> *MJTEs;
     bool IsPIC;
-    
+
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineModuleInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    
+
     static char ID;
   public:
     ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
@@ -68,7 +68,7 @@ namespace {
         TD(tm.getTargetData()), TM(tm),
     MCE(mce), MCPEs(0), MJTEs(0),
     IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-    
+
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
@@ -163,7 +163,7 @@ namespace {
 
 char ARMCodeEmitter::ID = 0;
 
-/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM 
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
 /// code to the specified MCE object.
 FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                                 JITCodeEmitter &JCE) {
@@ -617,8 +617,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   }
 }
 
-unsigned ARMCodeEmitter::getMachineSoRegOpValue(
-                                                const MachineInstr &MI,
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
                                                 const TargetInstrDesc &TID,
                                                 const MachineOperand &MO,
                                                 unsigned OpIdx) {
@@ -690,7 +689,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
 }
 
 unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
-                                             const TargetInstrDesc &TID) const {
+                                         const TargetInstrDesc &TID) const {
   for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
     const MachineOperand &MO = MI.getOperand(i-1);
     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
@@ -699,8 +698,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
   return 0;
 }
 
-void ARMCodeEmitter::emitDataProcessingInstruction(
-                                                   const MachineInstr &MI,
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
                                                    unsigned ImplicitRd,
                                                    unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
@@ -765,8 +763,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitLoadStoreInstruction(
-                                              const MachineInstr &MI,
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
                                               unsigned ImplicitRd,
                                               unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
@@ -841,7 +838,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(
 }
 
 void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
-                                                        unsigned ImplicitRn) {
+                                                  unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
   unsigned Form = TID.TSFlags & ARMII::FormMask;
   bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
@@ -950,7 +947,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
   Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
 
   // Set bit W(21)
-  if (ARM_AM::getAM4WBFlag(MO.getImm()))
+  if (IsUpdating)
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // Set registers
@@ -1238,8 +1235,7 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitVFPConversionInstruction(
-      const MachineInstr &MI) {
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
   unsigned Form = TID.TSFlags & ARMII::FormMask;
 
@@ -1329,8 +1325,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
-                                                       const MachineInstr &MI) {
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
   bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
 
@@ -1353,7 +1349,7 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
   Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
 
   // Set bit W(21)
-  if (ARM_AM::getAM5WBFlag(MO.getImm()))
+  if (IsUpdating)
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // First register is encoded in Dd.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 013e00a..71207c8 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -80,8 +80,7 @@ public:
                        SDValue &Mode);
   bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
-  bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update,
-                       SDValue &Opc, SDValue &Align);
+  bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
 
   bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
                         SDValue &Label);
@@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
 }
 
 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
-                                      SDValue &Addr, SDValue &Update,
-                                      SDValue &Opc, SDValue &Align) {
+                                      SDValue &Addr, SDValue &Align) {
   Addr = N;
-  // Default to no writeback.
-  Update = CurDAG->getRegister(0, MVT::i32);
-  Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
   // Default to no alignment.
   Align = CurDAG->getTargetConstant(0, MVT::i32);
   return true;
@@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
   assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
-  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
-                            Pred, PredReg, Chain };
+    const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
     std::vector<EVT> ResTys(NumVecs, VT);
     ResTys.push_back(MVT::Other);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     // Quad registers are directly supported for VLD2,
     // loading 2 pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
-                            Pred, PredReg, Chain };
+    const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
     std::vector<EVT> ResTys(4, VT);
     ResTys.push_back(MVT::Other);
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
     Chain = SDValue(VLd, 4);
 
     // Combine the even and odd subregs to produce the result.
@@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     // Otherwise, quad registers are loaded with two separate instructions,
     // where one loads the even registers and the other loads the odd registers.
 
-    // Enable writeback to the address register.
-    MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
     std::vector<EVT> ResTys(NumVecs, RegVT);
     ResTys.push_back(MemAddr.getValueType());
     ResTys.push_back(MVT::Other);
 
     // Load the even subregs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
-                             Pred, PredReg, Chain };
-    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
+    const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
     Chain = SDValue(VLdA, NumVecs+1);
 
     // Load the odd subregs.
     Opc = QOpcodes1[OpcodeIndex];
-    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
-                             Align, Pred, PredReg, Chain };
-    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
+    const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
+                             Align, Reg0, Pred, Reg0, Chain };
+    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
     Chain = SDValue(VLdB, NumVecs+1);
 
     // Combine the even and odd subregs to produce the result.
@@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
   assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
-  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
-  SmallVector<SDValue, 8> Ops;
+  SmallVector<SDValue, 10> Ops;
   Ops.push_back(MemAddr);
-  Ops.push_back(MemUpdate);
-  Ops.push_back(MemOpc);
   Ops.push_back(Align);
 
   if (is64BitVector) {
@@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
       Ops.push_back(N->getOperand(Vec+3));
     Ops.push_back(Pred);
-    Ops.push_back(PredReg);
+    Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
                                                    N->getOperand(Vec+3)));
     }
     Ops.push_back(Pred);
-    Ops.push_back(PredReg);
+    Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
   // where one stores the even registers and the other stores the odd registers.
 
-  // Enable writeback to the address register.
-  MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+  Ops.push_back(Reg0); // post-access address offset
 
   // Store the even subregs.
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
                                                  N->getOperand(Vec+3)));
   Ops.push_back(Pred);
-  Ops.push_back(PredReg);
+  Ops.push_back(Reg0); // predicate register
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+7);
+                                        MVT::Other, Ops.data(), NumVecs+6);
   Chain = SDValue(VStA, 1);
 
   // Store the odd subregs.
   Ops[0] = SDValue(VStA, 0); // MemAddr
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
                                                 N->getOperand(Vec+3));
-  Ops[NumVecs+4] = Pred;
-  Ops[NumVecs+5] = PredReg;
-  Ops[NumVecs+6] = Chain;
+  Ops[NumVecs+5] = Chain;
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+7);
+                                        MVT::Other, Ops.data(), NumVecs+6);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
-  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
-  SmallVector<SDValue, 9> Ops;
+  SmallVector<SDValue, 10> Ops;
   Ops.push_back(MemAddr);
-  Ops.push_back(MemUpdate);
-  Ops.push_back(MemOpc);
   Ops.push_back(Align);
 
   unsigned Opc = 0;
@@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   }
   Ops.push_back(getI32Imm(Lane));
   Ops.push_back(Pred);
-  Ops.push_back(PredReg);
+  Ops.push_back(Reg0);
   Ops.push_back(Chain);
 
   if (!IsLoad)
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
 
   std::vector<EVT> ResTys(NumVecs, RegVT);
   ResTys.push_back(MVT::Other);
   SDNode *VLdLn =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8);
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
   // For a 64-bit vector load to D registers, nothing more needs to be done.
   if (is64BitVector)
     return VLdLn;
@@ -1859,37 +1841,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case Intrinsic::arm_neon_vld3: {
       unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
                               ARM::VLD3d32, ARM::VLD3d64 };
-      unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
-      unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
+      unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
+                               ARM::VLD3q16_UPD,
+                               ARM::VLD3q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
+                               ARM::VLD3q16odd_UPD,
+                               ARM::VLD3q32odd_UPD };
       return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4: {
       unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
                               ARM::VLD4d32, ARM::VLD4d64 };
-      unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
-      unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
+      unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
+                               ARM::VLD4q16_UPD,
+                               ARM::VLD4q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
+                               ARM::VLD4q16odd_UPD,
+                               ARM::VLD4q32odd_UPD };
       return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld2lane: {
       unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
-      unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
+      unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
+      unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
       return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld3lane: {
       unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
-      unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
+      unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
+      unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
       return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4lane: {
       unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
-      unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
+      unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
+      unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
       return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
@@ -1903,37 +1893,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case Intrinsic::arm_neon_vst3: {
       unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
                               ARM::VST3d32, ARM::VST3d64 };
-      unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
-      unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+      unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
+                               ARM::VST3q16_UPD,
+                               ARM::VST3q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
+                               ARM::VST3q16odd_UPD,
+                               ARM::VST3q32odd_UPD };
       return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4: {
       unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
                               ARM::VST4d32, ARM::VST4d64 };
-      unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
-      unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+      unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
+                               ARM::VST4q16_UPD,
+                               ARM::VST4q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
+                               ARM::VST4q16odd_UPD,
+                               ARM::VST4q32odd_UPD };
       return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst2lane: {
       unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
-      unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
+      unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
+      unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
       return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst3lane: {
       unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
-      unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
+      unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
+      unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
       return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4lane: {
       unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
-      unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
+      unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
+      unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
       return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
     }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8f20843..0d0a004 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -436,9 +436,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
     }
     // Special handling for half-precision FP.
-    if (Subtarget->hasVFP3() && Subtarget->hasFP16()) {
-      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom);
-      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom);
+    if (!Subtarget->hasFP16()) {
+      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
     }
   }
 
@@ -499,8 +499,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
   case ARMISD::SITOF:         return "ARMISD::SITOF";
   case ARMISD::UITOF:         return "ARMISD::UITOF";
-  case ARMISD::F16_TO_F32:    return "ARMISD::F16_TO_F32";
-  case ARMISD::F32_TO_F16:    return "ARMISD::F32_TO_F16";
 
   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
@@ -1987,9 +1985,6 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
   default:
     assert(0 && "Invalid opcode!");
-  case ISD::FP32_TO_FP16:
-    Opc = ARMISD::F32_TO_F16;
-    break;
   case ISD::FP_TO_SINT:
     Opc = ARMISD::FTOSI;
     break;
@@ -2009,9 +2004,6 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
   default:
     assert(0 && "Invalid opcode!");
-  case ISD::FP16_TO_FP32:
-    Opc = ARMISD::F16_TO_F32;
-    break;
   case ISD::SINT_TO_FP:
     Opc = ARMISD::SITOF;
     break;
@@ -3078,10 +3070,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
-  case ISD::FP16_TO_FP32:
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
-  case ISD::FP32_TO_FP16:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d7b2ba3..f8f8adc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -59,8 +59,6 @@ namespace llvm {
       FTOUI,        // FP to uint within a FP register.
       SITOF,        // sint to FP within a FP register.
       UITOF,        // uint to FP within a FP register.
-      F16_TO_F32,   // Half FP to single FP within a FP register.
-      F32_TO_F16,   // Single FP to half FP within a FP register.
 
       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 258a96b..4f6f05d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -15,8 +15,8 @@
 // Format specifies the encoding used by the instruction.  This is part of the
 // ad-hoc solution used to emit machine instruction encodings by our machine
 // code emitter.
-class Format<bits<5> val> {
-  bits<5> Value = val;
+class Format<bits<6> val> {
+  bits<6> Value = val;
 }
 
 def Pseudo        : Format<0>;
@@ -33,32 +33,34 @@ def LdMiscFrm     : Format<8>;
 def StMiscFrm     : Format<9>;
 def LdStMulFrm    : Format<10>;
 
-def LdStExFrm     : Format<28>;
+def LdStExFrm     : Format<11>;
 
-def ArithMiscFrm  : Format<11>;
-def ExtFrm        : Format<12>;
+def ArithMiscFrm  : Format<12>;
+def ExtFrm        : Format<13>;
 
-def VFPUnaryFrm   : Format<13>;
-def VFPBinaryFrm  : Format<14>;
-def VFPConv1Frm   : Format<15>;
-def VFPConv2Frm   : Format<16>;
-def VFPConv3Frm   : Format<17>;
-def VFPConv4Frm   : Format<18>;
-def VFPConv5Frm   : Format<19>;
-def VFPLdStFrm    : Format<20>;
-def VFPLdStMulFrm : Format<21>;
-def VFPMiscFrm    : Format<22>;
+def VFPUnaryFrm   : Format<14>;
+def VFPBinaryFrm  : Format<15>;
+def VFPConv1Frm   : Format<16>;
+def VFPConv2Frm   : Format<17>;
+def VFPConv3Frm   : Format<18>;
+def VFPConv4Frm   : Format<19>;
+def VFPConv5Frm   : Format<20>;
+def VFPLdStFrm    : Format<21>;
+def VFPLdStMulFrm : Format<22>;
+def VFPMiscFrm    : Format<23>;
 
-def ThumbFrm      : Format<23>;
+def ThumbFrm      : Format<24>;
 
-def NEONFrm       : Format<24>;
-def NEONGetLnFrm  : Format<25>;
-def NEONSetLnFrm  : Format<26>;
-def NEONDupFrm    : Format<27>;
+def NEONFrm       : Format<25>;
+def NEONGetLnFrm  : Format<26>;
+def NEONSetLnFrm  : Format<27>;
+def NEONDupFrm    : Format<28>;
 
 def MiscFrm       : Format<29>;
 def ThumbMiscFrm  : Format<30>;
 
+def NLdStFrm      : Format<31>;
+
 // Misc flags.
 
 // the instruction has a Rn register operand.
@@ -71,7 +73,7 @@ class UnaryDP    { bit isUnaryDataProc = 1; }
 class Xform16Bit { bit canXformTo16Bit = 1; }
 
 //===----------------------------------------------------------------------===//
-// ARM Instruction flags.  These need to match ARMInstrInfo.h.
+// ARM Instruction flags.  These need to match ARMBaseInstrInfo.h.
 //
 
 // Addressing mode.
@@ -183,7 +185,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
   bits<2> IndexModeBits = IM.Value;
   
   Format F = f;
-  bits<5> Form = F.Value;
+  bits<6> Form = F.Value;
 
   Domain D = d;
   bits<2> Dom = D.Value;
@@ -229,7 +231,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
         list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
@@ -257,7 +259,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
          list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${p}${s}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
@@ -1007,8 +1009,8 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
-  let OutOperandList = !con(oops, (ops s_cc_out:$s));
-  let InOperandList = !con(iops, (ops pred:$p));
+  let OutOperandList = !con(oops, (outs s_cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
@@ -1030,7 +1032,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
@@ -1109,7 +1111,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
               string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
@@ -1125,7 +1127,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
@@ -1209,7 +1211,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
                  string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
@@ -1265,7 +1267,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
            string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [HasVFP2];
@@ -1464,11 +1466,12 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
 // ARM NEON Instruction templates.
 //
 
-class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
-            string opc, string dt, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+            InstrItinClass itin, string opc, string dt, string asm, string cstr,
+            list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(
                      !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
                      !strconcat("\t", asm));
@@ -1481,7 +1484,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
             string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
@@ -1502,8 +1505,8 @@ class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
 class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
             dag oops, dag iops, InstrItinClass itin,
             string opc, string dt, string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr,
-          pattern> {
+  : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+          cstr, pattern> {
   let Inst{31-24} = 0b11110100;
   let Inst{23} = op23;
   let Inst{21-20} = op21_20;
@@ -1513,7 +1516,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
 
 class NDataI<dag oops, dag iops, InstrItinClass itin,
              string opc, string dt, string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm,
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm,
          cstr, pattern> {
   let Inst{31-25} = 0b1111001;
 }
@@ -1621,7 +1624,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
   let Inst{4} = 1;
 
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(
                      !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
                      !strconcat("\t", asm));
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3fc37da..26a2806 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,11 +62,14 @@ def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
                               [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 def ARMcall_pred    : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 
 def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
                               [SDNPHasChain, SDNPOptInFlag]>;
@@ -282,7 +285,7 @@ def pclabel : Operand<i32> {
 
 // shifter_operand operands: so_reg and so_imm.
 def so_reg : Operand<i32>,    // reg reg imm
-            ComplexPattern<i32, 3, "SelectShifterOperandReg",
+             ComplexPattern<i32, 3, "SelectShifterOperandReg",
                             [shl,srl,sra,rotr]> {
   let PrintMethod = "printSORegOperand";
   let MIOperandInfo = (ops GPR, GPR, i32imm);
@@ -392,9 +395,14 @@ def addrmode5 : Operand<i32>,
 // addrmode6 := reg with optional writeback
 //
 def addrmode6 : Operand<i32>,
-                ComplexPattern<i32, 4, "SelectAddrMode6", []> {
+                ComplexPattern<i32, 2, "SelectAddrMode6", []> {
   let PrintMethod = "printAddrMode6Operand";
-  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+}
+
+def am6offset : Operand<i32> {
+  let PrintMethod = "printAddrMode6OffsetOperand";
+  let MIOperandInfo = (ops GPR);
 }
 
 // addrmodepc := pc + reg
@@ -909,7 +917,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
   def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                         reglist:$dsts, variable_ops),
                        IndexModeUpd, LdStMulFrm, IIC_Br,
-                       "ldm${addr:submode}${p}\t$addr, $dsts",
+                       "ldm${addr:submode}${p}\t$addr!, $dsts",
                        "$addr.addr = $wb", []>;
 
 // On non-Darwin platforms R9 is callee-saved.
@@ -1354,7 +1362,7 @@ def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
 def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                       reglist:$dsts, variable_ops),
                      IndexModeUpd, LdStMulFrm, IIC_iLoadm,
-                     "ldm${addr:submode}${p}\t$addr, $dsts",
+                     "ldm${addr:submode}${p}\t$addr!, $dsts",
                      "$addr.addr = $wb", []>;
 } // mayLoad, hasExtraDefRegAllocReq
 
@@ -1367,7 +1375,7 @@ def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
 def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                       reglist:$srcs, variable_ops),
                      IndexModeUpd, LdStMulFrm, IIC_iStorem,
-                     "stm${addr:submode}${p}\t$addr, $srcs",
+                     "stm${addr:submode}${p}\t$addr!, $srcs",
                      "$addr.addr = $wb", []>;
 } // mayStore, hasExtraSrcRegAllocReq
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8fee6fa..c977cc3 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -138,214 +138,360 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
 }
 
 //   VLD1     : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
-          OpcodeStr, Dt, "\\{$dst\\}, $addr", "",
-          [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+          "vld1", Dt, "\\{$dst\\}, $addr", "",
+          [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
+class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
-          OpcodeStr, Dt, "${dst:dregpair}, $addr", "",
-          [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
+          "vld1", Dt, "${dst:dregpair}, $addr", "",
+          [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
+
+def  VLD1d8   : VLD1D<0b0000, "8",  v8i8>;
+def  VLD1d16  : VLD1D<0b0100, "16", v4i16>;
+def  VLD1d32  : VLD1D<0b1000, "32", v2i32>;
+def  VLD1df   : VLD1D<0b1000, "32", v2f32>;
+def  VLD1d64  : VLD1D<0b1100, "64", v1i64>;
+
+def  VLD1q8   : VLD1Q<0b0000, "8",  v16i8>;
+def  VLD1q16  : VLD1Q<0b0100, "16", v8i16>;
+def  VLD1q32  : VLD1Q<0b1000, "32", v4i32>;
+def  VLD1qf   : VLD1Q<0b1000, "32", v4f32>;
+def  VLD1q64  : VLD1Q<0b1100, "64", v2i64>;
+
+let mayLoad = 1 in {
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+          "vld1", Dt, "\\{$dst\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+class VLD1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+          "vld1", Dt, "${dst:dregpair}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-def  VLD1d8   : VLD1D<0b0000, "vld1", "8",  v8i8,  int_arm_neon_vld1>;
-def  VLD1d16  : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>;
-def  VLD1d32  : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>;
-def  VLD1df   : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>;
-def  VLD1d64  : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>;
+def VLD1d8_UPD  : VLD1DWB<0b0000, "8">;
+def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
+def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
+def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
 
-def  VLD1q8   : VLD1Q<0b0000, "vld1", "8",  v16i8, int_arm_neon_vld1>;
-def  VLD1q16  : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>;
-def  VLD1q32  : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
-def  VLD1qf   : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
-def  VLD1q64  : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
+def VLD1q8_UPD  : VLD1QWB<0b0000, "8">;
+def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
+def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
+def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+} // mayLoad = 1
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 
 // These (dreg triple/quadruple) are for disassembly only.
-class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+class VLD1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
           "\\{$dst1, $dst2, $dst3\\}, $addr", "",
           [/* For disassembly only; pattern left blank */]>;
-class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+class VLD1D4<bits<4> op7_4, string Dt>
   : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
           "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
           [/* For disassembly only; pattern left blank */]>;
 
-def  VLD1d8T  : VLD1D3<0b0000, "vld1", "8">;
-def  VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
-def  VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
-//def  VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
-
-def  VLD1d8Q  : VLD1D4<0b0000, "vld1", "8">;
-def  VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
-def  VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
-//def  VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+def  VLD1d8T  : VLD1D3<0b0000, "8">;
+def  VLD1d16T : VLD1D3<0b0100, "16">;
+def  VLD1d32T : VLD1D3<0b1000, "32">;
+//   VLD1d64T : implemented as VLD3d64
+
+def  VLD1d8Q  : VLD1D4<0b0000, "8">;
+def  VLD1d16Q : VLD1D4<0b0100, "16">;
+def  VLD1d32Q : VLD1D4<0b1000, "32">;
+//   VLD1d64Q : implemented as VLD4d64
+
+// ...with address register writeback:
+class VLD1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+          "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
+class VLD1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
 
+def VLD1d8T_UPD  : VLD1D3WB<0b0000, "8">;
+def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
+def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
+//  VLD1d64T_UPD : implemented as VLD3d64_UPD
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+def VLD1d8Q_UPD  : VLD1D4WB<0b0000, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
+//  VLD1d64Q_UPD : implemented as VLD4d64_UPD
 
 //   VLD2     : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2),
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
           (ins addrmode6:$addr), IIC_VLD2,
-          OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
-class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0011,op7_4,
+          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+class VLD2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
           (ins addrmode6:$addr), IIC_VLD2,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
-          "", []>;
+          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
 
-def  VLD2d8   : VLD2D<0b0000, "vld2", "8">;
-def  VLD2d16  : VLD2D<0b0100, "vld2", "16">;
-def  VLD2d32  : VLD2D<0b1000, "vld2", "32">;
+def  VLD2d8   : VLD2D<0b1000, 0b0000, "8">;
+def  VLD2d16  : VLD2D<0b1000, 0b0100, "16">;
+def  VLD2d32  : VLD2D<0b1000, 0b1000, "32">;
 def  VLD2d64  : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
                       (ins addrmode6:$addr), IIC_VLD1,
                       "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
 
-def  VLD2q8   : VLD2Q<0b0000, "vld2", "8">;
-def  VLD2q16  : VLD2Q<0b0100, "vld2", "16">;
-def  VLD2q32  : VLD2Q<0b1000, "vld2", "32">;
+def  VLD2q8   : VLD2Q<0b0000, "8">;
+def  VLD2q16  : VLD2Q<0b0100, "16">;
+def  VLD2q32  : VLD2Q<0b1000, "32">;
 
-// These (double-spaced dreg pair) are for disassembly only.
-class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
-          (ins addrmode6:$addr), IIC_VLD2,
-          OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+class VLD2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-def  VLD2d8D  : VLD2Ddbl<0b0000, "vld2", "8">;
-def  VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
-def  VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
+def VLD2d8_UPD  : VLD2DWB<0b1000, 0b0000, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
+def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
+                        (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+                        "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+def VLD2q8_UPD  : VLD2QWB<0b0000, "8">;
+def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
+def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8      : VLD2D<0b1001, 0b0000, "8">;
+def VLD2b16     : VLD2D<0b1001, 0b0100, "16">;
+def VLD2b32     : VLD2D<0b1001, 0b1000, "32">;
+def VLD2b8_UPD  : VLD2DWB<0b1001, 0b0000, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
 
 //   VLD3     : Vector Load (multiple 3-element structures)
-class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
           (ins addrmode6:$addr), IIC_VLD3,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
-class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
-          (ins addrmode6:$addr), IIC_VLD3,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr",
-          "$addr.addr = $wb", []>;
+          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
 
-def  VLD3d8   : VLD3D<0b0000, "vld3", "8">;
-def  VLD3d16  : VLD3D<0b0100, "vld3", "16">;
-def  VLD3d32  : VLD3D<0b1000, "vld3", "32">;
+def  VLD3d8   : VLD3D<0b0100, 0b0000, "8">;
+def  VLD3d16  : VLD3D<0b0100, 0b0100, "16">;
+def  VLD3d32  : VLD3D<0b0100, 0b1000, "32">;
 def  VLD3d64  : NLdSt<0,0b10,0b0110,0b1100,
                       (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
                       (ins addrmode6:$addr), IIC_VLD1,
                       "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
 
-// vld3 to double-spaced even registers.
-def  VLD3q8a  : VLD3WB<0b0000, "vld3", "8">;
-def  VLD3q16a : VLD3WB<0b0100, "vld3", "16">;
-def  VLD3q32a : VLD3WB<0b1000, "vld3", "32">;
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
+          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vld3 to double-spaced odd registers.
-def  VLD3q8b  : VLD3WB<0b0000, "vld3", "8">;
-def  VLD3q16b : VLD3WB<0b0100, "vld3", "16">;
-def  VLD3q32b : VLD3WB<0b1000, "vld3", "32">;
+def VLD3d8_UPD  : VLD3DWB<0b0100, 0b0000, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100,
+                        (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+                        "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VLD3q8      : VLD3D<0b0101, 0b0000, "8">;
+def VLD3q16     : VLD3D<0b0101, 0b0100, "16">;
+def VLD3q32     : VLD3D<0b0101, 0b1000, "32">;
+def VLD3q8_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8odd_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
+def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
+def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
 
 //   VLD4     : Vector Load (multiple 4-element structures)
-class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0000,op7_4,
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
           (ins addrmode6:$addr), IIC_VLD4,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
-          "", []>;
-class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0001,op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$addr), IIC_VLD4,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
-          "$addr.addr = $wb", []>;
+          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
 
-def  VLD4d8   : VLD4D<0b0000, "vld4", "8">;
-def  VLD4d16  : VLD4D<0b0100, "vld4", "16">;
-def  VLD4d32  : VLD4D<0b1000, "vld4", "32">;
+def  VLD4d8   : VLD4D<0b0000, 0b0000, "8">;
+def  VLD4d16  : VLD4D<0b0000, 0b0100, "16">;
+def  VLD4d32  : VLD4D<0b0000, 0b1000, "32">;
 def  VLD4d64  : NLdSt<0,0b10,0b0010,0b1100,
                       (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
                       (ins addrmode6:$addr), IIC_VLD1,
                       "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
                       "", []>;
 
-// vld4 to double-spaced even registers.
-def  VLD4q8a  : VLD4WB<0b0000, "vld4", "8">;
-def  VLD4q16a : VLD4WB<0b0100, "vld4", "16">;
-def  VLD4q32a : VLD4WB<0b1000, "vld4", "32">;
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
+          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vld4 to double-spaced odd registers.
-def  VLD4q8b  : VLD4WB<0b0000, "vld4", "8">;
-def  VLD4q16b : VLD4WB<0b0100, "vld4", "16">;
-def  VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
+def VLD4d8_UPD  : VLD4DWB<0b0000, 0b0000, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100,
+                        (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4,
+                         GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+                        "vld1", "64",
+                        "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VLD4q8      : VLD4D<0b0001, 0b0000, "8">;
+def VLD4q16     : VLD4D<0b0001, 0b0100, "16">;
+def VLD4q32     : VLD4D<0b0001, 0b1000, "32">;
+def VLD4q8_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8odd_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
+def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
+def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
 
 //   VLD1LN   : Vector Load (single element to one lane)
 //   FIXME: Not yet implemented.
 
 //   VLD2LN   : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-            IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
-            "$src1 = $dst1, $src2 = $dst2", []>;
+class VLD2LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+          "$src1 = $dst1, $src2 = $dst2", []>;
+
+def VLD2LNd8  : VLD2LN<0b0001, "8">;
+def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; }
+def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; }
 
-// vld2 to single-spaced registers.
-def VLD2LNd8  : VLD2LN<0b0001, "vld2", "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; }
-def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; }
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
 
-// vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
-def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
 
-// vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
-def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
+          "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
+          "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
+
+def VLD2LNd8_UPD  : VLD2LNWB<0b0001, "8">;
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; }
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; }
 
 //   VLD3LN   : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-            nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt,
-            "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
-            "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
-
-// vld3 to single-spaced registers.
-def VLD3LNd8  : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; }
-def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; }
-
-// vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
-
-// vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
+class VLD3LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+          nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
+          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+
+def VLD3LNd8  : VLD3LN<0b0010, "8"> { let Inst{4} = 0; }
+def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VLD3, "vld3", Dt,
+          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
+          []>;
+
+def VLD3LNd8_UPD  : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; }
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
 
 //   VLD4LN   : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b10,op11_8,{?,?,?,?},
-            (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-            nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt,
+class VLD4LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+          nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
           "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
-            "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
 
-// vld4 to single-spaced registers.
-def VLD4LNd8  : VLD4LN<0b0011, "vld4", "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; }
-def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; }
+def VLD4LNd8  : VLD4LN<0b0011, "8">;
+def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; }
+def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; }
 
-// vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
-def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
 
-// vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
-def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VLD4, "vld4", Dt,
+"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
+"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
+          []>;
+
+def VLD4LNd8_UPD  : VLD4LNWB<0b0011, "8">;
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; }
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
@@ -355,213 +501,353 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
 } // mayLoad = 1, hasExtraDefRegAllocReq = 1
 
 //   VST1     : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
-          OpcodeStr, Dt, "\\{$src\\}, $addr", "",
-          [(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+          "vst1", Dt, "\\{$src\\}, $addr", "",
+          [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
+class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
-          OpcodeStr, Dt, "${src:dregpair}, $addr", "",
-          [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
+          "vst1", Dt, "${src:dregpair}, $addr", "",
+          [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
 
 let hasExtraSrcRegAllocReq = 1 in {
-def  VST1d8   : VST1D<0b0000, "vst1", "8",  v8i8,  int_arm_neon_vst1>;
-def  VST1d16  : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>;
-def  VST1d32  : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>;
-def  VST1df   : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>;
-def  VST1d64  : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>;
-
-def  VST1q8   : VST1Q<0b0000, "vst1", "8",  v16i8, int_arm_neon_vst1>;
-def  VST1q16  : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>;
-def  VST1q32  : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>;
-def  VST1qf   : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
-def  VST1q64  : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
+def  VST1d8   : VST1D<0b0000, "8",  v8i8>;
+def  VST1d16  : VST1D<0b0100, "16", v4i16>;
+def  VST1d32  : VST1D<0b1000, "32", v2i32>;
+def  VST1df   : VST1D<0b1000, "32", v2f32>;
+def  VST1d64  : VST1D<0b1100, "64", v1i64>;
+
+def  VST1q8   : VST1Q<0b0000, "8",  v16i8>;
+def  VST1q16  : VST1Q<0b0100, "16", v8i16>;
+def  VST1q32  : VST1Q<0b1000, "32", v4i32>;
+def  VST1qf   : VST1Q<0b1000, "32", v4f32>;
+def  VST1q64  : VST1Q<0b1100, "64", v2i64>;
 } // hasExtraSrcRegAllocReq
 
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
+          "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
+class VST1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+          "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+
+def VST1d8_UPD  : VST1DWB<0b0000, "8">;
+def VST1d16_UPD : VST1DWB<0b0100, "16">;
+def VST1d32_UPD : VST1DWB<0b1000, "32">;
+def VST1d64_UPD : VST1DWB<0b1100, "64">;
+
+def VST1q8_UPD  : VST1QWB<0b0000, "8">;
+def VST1q16_UPD : VST1QWB<0b0100, "16">;
+def VST1q32_UPD : VST1QWB<0b1000, "32">;
+def VST1q64_UPD : VST1QWB<0b1100, "64">;
+
 // These (dreg triple/quadruple) are for disassembly only.
-class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+class VST1D3<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          OpcodeStr, Dt,
-          "\\{$src1, $src2, $src3\\}, $addr", "",
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "",
           [/* For disassembly only; pattern left blank */]>;
-class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+class VST1D4<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt,
-          "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
           [/* For disassembly only; pattern left blank */]>;
 
-def  VST1d8T  : VST1D3<0b0000, "vst1", "8">;
-def  VST1d16T : VST1D3<0b0100, "vst1", "16">;
-def  VST1d32T : VST1D3<0b1000, "vst1", "32">;
-//def  VST1d64T : VST1D3<0b1100, "vst1", "64">;
-
-def  VST1d8Q  : VST1D4<0b0000, "vst1", "8">;
-def  VST1d16Q : VST1D4<0b0100, "vst1", "16">;
-def  VST1d32Q : VST1D4<0b1000, "vst1", "32">;
-//def  VST1d64Q : VST1D4<0b1100, "vst1", "64">;
+def  VST1d8T  : VST1D3<0b0000, "8">;
+def  VST1d16T : VST1D3<0b0100, "16">;
+def  VST1d32T : VST1D3<0b1000, "32">;
+//   VST1d64T : implemented as VST3d64
+
+def  VST1d8Q  : VST1D4<0b0000, "8">;
+def  VST1d16Q : VST1D4<0b0100, "16">;
+def  VST1d32Q : VST1D4<0b1000, "32">;
+//   VST1d64Q : implemented as VST4d64
+
+// ...with address register writeback:
+class VST1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3),
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
+          "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
+class VST1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+          "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
 
+def VST1d8T_UPD  : VST1D3WB<0b0000, "8">;
+def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
+def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
+//  VST1d64T_UPD : implemented as VST3d64_UPD
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+def VST1d8Q_UPD  : VST1D4WB<0b0000, "8">;
+def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
+def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
+//  VST1d64Q_UPD : implemented as VST4d64_UPD
 
 //   VST2     : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b1000,op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
-class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0011,op7_4, (outs),
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+class VST2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
           "", []>;
 
-def  VST2d8   : VST2D<0b0000, "vst2", "8">;
-def  VST2d16  : VST2D<0b0100, "vst2", "16">;
-def  VST2d32  : VST2D<0b1000, "vst2", "32">;
+def  VST2d8   : VST2D<0b1000, 0b0000, "8">;
+def  VST2d16  : VST2D<0b1000, 0b0100, "16">;
+def  VST2d32  : VST2D<0b1000, 0b1000, "32">;
 def  VST2d64  : NLdSt<0,0b00,0b1010,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
                       "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
 
-def  VST2q8   : VST2Q<0b0000, "vst2", "8">;
-def  VST2q16  : VST2Q<0b0100, "vst2", "16">;
-def  VST2q32  : VST2Q<0b1000, "vst2", "32">;
+def  VST2q8   : VST2Q<0b0000, "8">;
+def  VST2q16  : VST2Q<0b0100, "16">;
+def  VST2q32  : VST2Q<0b1000, "32">;
 
-// These (double-spaced dreg pair) are for disassembly only.
-class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+class VST2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-def  VST2d8D  : VST2Ddbl<0b0000, "vst2", "8">;
-def  VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
-def  VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
+def VST2d8_UPD  : VST2DWB<0b1000, 0b0000, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
+def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset,
+                         DPR:$src1, DPR:$src2), IIC_VST,
+                        "vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+def VST2q8_UPD  : VST2QWB<0b0000, "8">;
+def VST2q16_UPD : VST2QWB<0b0100, "16">;
+def VST2q32_UPD : VST2QWB<0b1000, "32">;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8      : VST2D<0b1001, 0b0000, "8">;
+def VST2b16     : VST2D<0b1001, 0b0100, "16">;
+def VST2b32     : VST2D<0b1001, 0b1000, "32">;
+def VST2b8_UPD  : VST2DWB<0b1001, 0b0000, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
 
 //   VST3     : Vector Store (multiple 3-element structures)
-class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0100,op7_4, (outs),
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
-class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr",
-          "$addr.addr = $wb", []>;
+          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
 
-def  VST3d8   : VST3D<0b0000, "vst3", "8">;
-def  VST3d16  : VST3D<0b0100, "vst3", "16">;
-def  VST3d32  : VST3D<0b1000, "vst3", "32">;
+def  VST3d8   : VST3D<0b0100, 0b0000, "8">;
+def  VST3d16  : VST3D<0b0100, 0b0100, "16">;
+def  VST3d32  : VST3D<0b0100, 0b1000, "32">;
 def  VST3d64  : NLdSt<0,0b00,0b0110,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
                       IIC_VST,
                       "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>;
 
-// vst3 to double-spaced even registers.
-def  VST3q8a  : VST3WB<0b0000, "vst3", "8">;
-def  VST3q16a : VST3WB<0b0100, "vst3", "16">;
-def  VST3q32a : VST3WB<0b1000, "vst3", "32">;
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vst3 to double-spaced odd registers.
-def  VST3q8b  : VST3WB<0b0000, "vst3", "8">;
-def  VST3q16b : VST3WB<0b0100, "vst3", "16">;
-def  VST3q32b : VST3WB<0b1000, "vst3", "32">;
+def VST3d8_UPD  : VST3DWB<0b0100, 0b0000, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb),
+                      (ins addrmode6:$addr, am6offset:$offset,
+                       DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+                      "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset",
+                      "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VST3q8      : VST3D<0b0101, 0b0000, "8">;
+def VST3q16     : VST3D<0b0101, 0b0100, "16">;
+def VST3q32     : VST3D<0b0101, 0b1000, "32">;
+def VST3q8_UPD  : VST3DWB<0b0101, 0b0000, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8odd_UPD  : VST3DWB<0b0101, 0b0000, "8">;
+def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
+def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
 
 //   VST4     : Vector Store (multiple 4-element structures)
-class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0000,op7_4, (outs),
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
           "", []>;
-class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
-          "$addr.addr = $wb", []>;
 
-def  VST4d8   : VST4D<0b0000, "vst4", "8">;
-def  VST4d16  : VST4D<0b0100, "vst4", "16">;
-def  VST4d32  : VST4D<0b1000, "vst4", "32">;
+def  VST4d8   : VST4D<0b0000, 0b0000, "8">;
+def  VST4d16  : VST4D<0b0000, 0b0100, "16">;
+def  VST4d32  : VST4D<0b0000, 0b1000, "32">;
 def  VST4d64  : NLdSt<0,0b00,0b0010,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
                        DPR:$src4), IIC_VST,
                       "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr",
                       "", []>;
 
-// vst4 to double-spaced even registers.
-def  VST4q8a  : VST4WB<0b0000, "vst4", "8">;
-def  VST4q16a : VST4WB<0b0100, "vst4", "16">;
-def  VST4q32a : VST4WB<0b1000, "vst4", "32">;
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+           "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vst4 to double-spaced odd registers.
-def  VST4q8b  : VST4WB<0b0000, "vst4", "8">;
-def  VST4q16b : VST4WB<0b0100, "vst4", "16">;
-def  VST4q32b : VST4WB<0b1000, "vst4", "32">;
+def VST4d8_UPD  : VST4DWB<0b0000, 0b0000, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb),
+                      (ins addrmode6:$addr, am6offset:$offset,
+                       DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+                      "vst1", "64",
+                      "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+                      "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VST4q8      : VST4D<0b0001, 0b0000, "8">;
+def VST4q16     : VST4D<0b0001, 0b0100, "16">;
+def VST4q32     : VST4D<0b0001, 0b1000, "32">;
+def VST4q8_UPD  : VST4DWB<0b0001, 0b0000, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8odd_UPD  : VST4DWB<0b0001, 0b0000, "8">;
+def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
+def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
 
 //   VST1LN   : Vector Store (single element from one lane)
 //   FIXME: Not yet implemented.
 
 //   VST2LN   : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST2LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+          IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
           "", []>;
 
-// vst2 to single-spaced registers.
-def VST2LNd8  : VST2LN<0b0001, "vst2", "8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; }
-def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; }
+def VST2LNd8  : VST2LN<0b0001, "8">;
+def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; }
+def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; }
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
 
-// vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
-def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
+          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
-def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
+def VST2LNd8_UPD  : VST2LNWB<0b0001, "8">;
+def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; }
+def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; }
 
 //   VST3LN   : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST3LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-           nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+           nohash_imm:$lane), IIC_VST, "vst3", Dt,
           "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
 
-// vst3 to single-spaced registers.
-def VST3LNd8  : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; }
-def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8  : VST3LN<0b0010, "8"> { let Inst{4} = 0; }
+def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VST, "vst3", Dt,
+          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
+def VST3LNd8_UPD  : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; }
+def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
 
-// vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
 
 //   VST4LN   : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST4LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-           nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+           nohash_imm:$lane), IIC_VST, "vst4", Dt,
           "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
           "", []>;
 
-// vst4 to single-spaced registers.
-def VST4LNd8  : VST4LN<0b0011, "vst4", "8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; }
-def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; }
+def VST4LNd8  : VST4LN<0b0011, "8">;
+def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; }
+def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; }
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VST, "vst4", Dt,
+  "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
-def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
+def VST4LNd8_UPD  : VST4LNWB<0b0011, "8">;
+def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; }
+def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; }
 
-// vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
-def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
+def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; }
 
 } // mayStore = 1, hasExtraSrcRegAllocReq = 1
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 37c9fc5..e3ca536 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -16,7 +16,8 @@
 //
 
 def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
-                      [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                      [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                       SDNPVariadic]>;
 
 def imm_neg_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
@@ -549,7 +550,7 @@ def tLDM : T1I<(outs),
 def tLDM_UPD : T1It<(outs tGPR:$wb),
                     (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
                     IIC_iLoadm,
-                    "ldm${addr:submode}${p}\t$addr, $dsts",
+                    "ldm${addr:submode}${p}\t$addr!, $dsts",
                     "$addr.addr = $wb", []>,
                T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
 } // mayLoad, hasExtraDefRegAllocReq
@@ -558,7 +559,7 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def tSTM_UPD : T1It<(outs tGPR:$wb),
                     (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
                     IIC_iStorem,
-                    "stm${addr:submode}${p}\t$addr, $srcs",
+                    "stm${addr:submode}${p}\t$addr!, $srcs",
                     "$addr.addr = $wb", []>,
            T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index ab9e926..262aae4 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1218,7 +1218,7 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
 
 def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                        reglist:$dsts, variable_ops), IIC_iLoadm,
-                      "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+                      "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
                       "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
@@ -1244,7 +1244,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
 def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                        reglist:$srcs, variable_ops),
                       IIC_iStorem,
-                      "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs",
+                      "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
                       "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 4d1d48a..aca8230 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -25,8 +25,6 @@ def arm_ftoui  : SDNode<"ARMISD::FTOUI",  SDT_FTOI>;
 def arm_ftosi  : SDNode<"ARMISD::FTOSI",  SDT_FTOI>;
 def arm_sitof  : SDNode<"ARMISD::SITOF",  SDT_ITOF>;
 def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
-def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>;
-def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>;
 def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
@@ -94,7 +92,7 @@ def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
 def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$dsts, variable_ops),
                       IndexModeUpd, IIC_fpLoadm,
-                      "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+                      "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
                       "$addr.base = $wb", []> {
   let Inst{20} = 1;
 }
@@ -102,7 +100,7 @@ def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
 def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$dsts, variable_ops),
                       IndexModeUpd, IIC_fpLoadm, 
-                      "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+                      "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
                       "$addr.base = $wb", []> {
   let Inst{20} = 1;
 }
@@ -124,7 +122,7 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
 def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$srcs, variable_ops),
                       IndexModeUpd, IIC_fpStorem,
-                      "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+                      "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
                       "$addr.base = $wb", []> {
   let Inst{20} = 0;
 }
@@ -132,7 +130,7 @@ def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
 def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$srcs, variable_ops),
                       IndexModeUpd, IIC_fpStorem,
-                      "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+                      "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
                       "$addr.base = $wb", []> {
   let Inst{20} = 0;
 }
@@ -259,11 +257,17 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 
 def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
-                 [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>;
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
 
 def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
-                 [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>;
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+             (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
 def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8fbcf45..bdbec30 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -253,7 +253,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
         .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
     : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
         .addReg(Base, getKillRegState(BaseKill))
-        .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+        .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
         .addImm(Pred).addReg(PredReg);
   for (unsigned i = 0; i != NumRegs; ++i)
     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
@@ -505,11 +505,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
       if (MI->getOperand(i).getReg() == Base)
         return false;
     }
-    assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()));
     Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
   } else {
     // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
-    assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()));
     Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
     Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
   }
@@ -573,11 +571,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     .addReg(Base, getKillRegState(BaseKill));
   if (isAM4) {
     // [t2]LDM_UPD, [t2]STM_UPD
-    MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true))
+    MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
       .addImm(Pred).addReg(PredReg);
   } else {
     // VLDM[SD}_UPD, VSTM[SD]_UPD
-    MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset))
+    MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
       .addImm(Pred).addReg(PredReg);
   }
   // Transfer the rest of operands.
@@ -709,7 +707,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   unsigned Offset = 0;
   if (isAM5)
     Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
-                               true, (isDPR ? 2 : 1));
+                               (isDPR ? 2 : 1));
   else if (isAM2)
     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
   else
@@ -1157,19 +1155,24 @@ namespace {
   };
 }
 
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
-/// (bx lr) into the preceeding stack restore so it directly restore the value
-/// of LR into pc.
-///   ldmfd sp!, {r7, lr}
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// directly restore the value of LR into pc.
+///   ldmfd sp!, {..., lr}
 ///   bx lr
+/// or
+///   ldmfd sp!, {..., lr}
+///   mov pc, lr
 /// =>
-///   ldmfd sp!, {r7, pc}
+///   ldmfd sp!, {..., pc}
 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   if (MBB.empty()) return false;
 
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
   if (MBBI != MBB.begin() &&
-      (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
+      (MBBI->getOpcode() == ARM::BX_RET ||
+       MBBI->getOpcode() == ARM::tBX_RET ||
+       MBBI->getOpcode() == ARM::MOVPCLR)) {
     MachineInstr *PrevMI = prior(MBBI);
     if (PrevMI->getOpcode() == ARM::LDM_UPD ||
         PrevMI->getOpcode() == ARM::t2LDM_UPD) {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7233f5c..95f57b7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -21,7 +21,7 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 88e67e3..c32f16c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -23,6 +23,7 @@
 #include "ARMISelLowering.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
+#include "llvm/ADT/OwningPtr.h"
 
 namespace llvm {
 
@@ -83,7 +84,8 @@ public:
 ///   Thumb-1 and Thumb-2.
 ///
 class ThumbTargetMachine : public ARMBaseTargetMachine {
-  ARMBaseInstrInfo    *InstrInfo;   // either Thumb1InstrInfo or Thumb2InstrInfo
+  // Either Thumb1InstrInfo or Thumb2InstrInfo.
+  OwningPtr<ARMBaseInstrInfo> InstrInfo;
   const TargetData    DataLayout;   // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
 public:
@@ -100,7 +102,9 @@ public:
   }
 
   /// returns either Thumb1InstrInfo or Thumb2InstrInfo
-  virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; }
+  virtual const ARMBaseInstrInfo *getInstrInfo() const {
+    return InstrInfo.get();
+  }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 };
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4db14a3..4a7a1e4 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -101,6 +101,7 @@ namespace {
     void printAddrMode5Operand(const MachineInstr *MI, int OpNum,
                                const char *Modifier = 0);
     void printAddrMode6Operand(const MachineInstr *MI, int OpNum);
+    void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum);
     void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
                                 const char *Modifier = 0);
     void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
@@ -431,16 +432,16 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
   O << "[" << getRegisterName(MO1.getReg());
 
   if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
       O << ", #"
-        << (char)ARM_AM::getAM2Op(MO3.getImm())
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
         << ARM_AM::getAM2Offset(MO3.getImm());
     O << "]";
     return;
   }
 
   O << ", "
-    << (char)ARM_AM::getAM2Op(MO3.getImm())
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
     << getRegisterName(MO2.getReg());
 
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
@@ -458,12 +459,12 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
     assert(ImmOffs && "Malformed indexed load / store!");
     O << "#"
-      << (char)ARM_AM::getAM2Op(MO2.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
       << ImmOffs;
     return;
   }
 
-  O << (char)ARM_AM::getAM2Op(MO2.getImm())
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
     << getRegisterName(MO1.getReg());
 
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
@@ -490,7 +491,7 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
 
   if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
     O << ", #"
-      << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
       << ImmOffs;
   O << "]";
 }
@@ -508,35 +509,22 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
   assert(ImmOffs && "Malformed indexed load / store!");
   O << "#"
-    << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
     << ImmOffs;
 }
 
 void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
                                           const char *Modifier) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
   if (Modifier && strcmp(Modifier, "submode") == 0) {
-    if (MO1.getReg() == ARM::SP) {
-      // FIXME
-      bool isLDM = (MI->getOpcode() == ARM::LDM ||
-                    MI->getOpcode() == ARM::LDM_UPD ||
-                    MI->getOpcode() == ARM::LDM_RET ||
-                    MI->getOpcode() == ARM::t2LDM ||
-                    MI->getOpcode() == ARM::t2LDM_UPD ||
-                    MI->getOpcode() == ARM::t2LDM_RET);
-      O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
-    } else
-      O << ARM_AM::getAMSubModeStr(Mode);
+    O << ARM_AM::getAMSubModeStr(Mode);
   } else if (Modifier && strcmp(Modifier, "wide") == 0) {
     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
     if (Mode == ARM_AM::ia)
       O << ".w";
   } else {
     printOperand(MI, Op);
-    if (ARM_AM::getAM4WBFlag(MO2.getImm()))
-      O << "!";
   }
 }
 
@@ -559,8 +547,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
     O << getRegisterName(MO1.getReg());
-    if (ARM_AM::getAM5WBFlag(MO2.getImm()))
-      O << "!";
     return;
   }
 
@@ -568,7 +554,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
 
   if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
     O << ", #"
-      << (char)ARM_AM::getAM5Op(MO2.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
       << ImmOffs*4;
   }
   O << "]";
@@ -577,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
 void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
-  const MachineOperand &MO3 = MI->getOperand(Op+2);
-  const MachineOperand &MO4 = MI->getOperand(Op+3);
 
   O << "[" << getRegisterName(MO1.getReg());
-  if (MO4.getImm()) {
+  if (MO2.getImm()) {
     // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << MO4.getImm();
+    O << ", :" << MO2.getImm();
   }
   O << "]";
+}
 
-  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
-    if (MO2.getReg() == 0)
-      O << "!";
-    else
-      O << ", " << getRegisterName(MO2.getReg());
-  }
+void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){
+  const MachineOperand &MO = MI->getOperand(Op);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
 }
 
 void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
@@ -604,7 +589,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
 
   const MachineOperand &MO1 = MI->getOperand(Op);
   assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << "[pc, +" << getRegisterName(MO1.getReg()) << "]";
+  O << "[pc, " << getRegisterName(MO1.getReg()) << "]";
 }
 
 void
@@ -627,10 +612,11 @@ void
 ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
   // (3 - the number of trailing zeros) is the number of then / else.
   unsigned Mask = MI->getOperand(Op).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
   unsigned NumTZ = CountTrailingZeros_32(Mask);
   assert(NumTZ <= 3 && "Invalid IT mask!");
   for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
-    bool T = (Mask & (1 << Pos)) == 0;
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
     if (T)
       O << 't';
     else
@@ -662,7 +648,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
   if (MO3.getReg())
     O << ", " << getRegisterName(MO3.getReg());
   else if (unsigned ImmOffs = MO2.getImm())
-    O << ", #+" << ImmOffs * Scale;
+    O << ", #" << ImmOffs * Scale;
   O << "]";
 }
 
@@ -684,7 +670,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   O << "[" << getRegisterName(MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm())
-    O << ", #+" << ImmOffs*4;
+    O << ", #" << ImmOffs*4;
   O << "]";
 }
 
@@ -720,7 +706,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
 
   unsigned OffImm = MO2.getImm();
   if (OffImm)  // Don't print +0.
-    O << ", #+" << OffImm;
+    O << ", #" << OffImm;
   O << "]";
 }
 
@@ -736,7 +722,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
   if (OffImm < 0)
     O << ", #-" << -OffImm;
   else if (OffImm > 0)
-    O << ", #+" << OffImm;
+    O << ", #" << OffImm;
   O << "]";
 }
 
@@ -752,7 +738,7 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
   if (OffImm < 0)
     O << ", #-" << -OffImm * 4;
   else if (OffImm > 0)
-    O << ", #+" << OffImm * 4;
+    O << ", #" << OffImm * 4;
   O << "]";
 }
 
@@ -764,7 +750,7 @@ void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
   if (OffImm < 0)
     O << "#-" << -OffImm;
   else if (OffImm > 0)
-    O << "#+" << OffImm;
+    O << "#" << OffImm;
 }
 
 void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index a2084b0..30763a9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -28,7 +28,159 @@ using namespace llvm;
 #undef MachineInstr
 #undef ARMAsmPrinter
 
-void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+static unsigned NextReg(unsigned Reg) {
+  switch (Reg) {
+  default:
+    assert(0 && "Unexpected register enum");
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D1:
+    return ARM::D2;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D3:
+    return ARM::D4;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D5:
+    return ARM::D6;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D7:
+    return ARM::D8;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D9:
+    return ARM::D10;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D11:
+    return ARM::D12;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D13:
+    return ARM::D14;
+  case ARM::D14:
+    return ARM::D15;
+  case ARM::D15:
+    return ARM::D16;
+  case ARM::D16:
+    return ARM::D17;
+  case ARM::D17:
+    return ARM::D18;
+  case ARM::D18:
+    return ARM::D19;
+  case ARM::D19:
+    return ARM::D20;
+  case ARM::D20:
+    return ARM::D21;
+  case ARM::D21:
+    return ARM::D22;
+  case ARM::D22:
+    return ARM::D23;
+  case ARM::D23:
+    return ARM::D24;
+  case ARM::D24:
+    return ARM::D25;
+  case ARM::D25:
+    return ARM::D26;
+  case ARM::D26:
+    return ARM::D27;
+  case ARM::D27:
+    return ARM::D28;
+  case ARM::D28:
+    return ARM::D29;
+  case ARM::D29:
+    return ARM::D30;
+  case ARM::D30:
+    return ARM::D31;
+  }
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI) {
+  // Check for MOVs and print canonical forms, instead.
+  if (MI->getOpcode() == ARM::MOVs) {
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+    const MCOperand &MO3 = MI->getOperand(3);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+    printSBitModifierOperand(MI, 6);
+    printPredicateOperand(MI, 4);
+
+    O << '\t' << getRegisterName(Dst.getReg())
+      << ", " << getRegisterName(MO1.getReg());
+
+    if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+      return;
+
+    O << ", ";
+
+    if (MO2.getReg()) {
+      O << getRegisterName(MO2.getReg());
+      assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+    } else {
+      O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+    }
+    return;
+  }
+
+  // A8.6.123 PUSH
+  if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
+      O << '\t' << "push";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  // A8.6.122 POP
+  if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
+      O << '\t' << "pop";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  // A8.6.355 VPUSH
+  if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+      O << '\t' << "vpush";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  // A8.6.354 VPOP
+  if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+      O << '\t' << "vpop";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  printInstruction(MI);
+ }
 
 void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                   const char *Modifier) {
@@ -36,6 +188,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
     if (Modifier && strcmp(Modifier, "dregpair") == 0) {
+      O << '{' << getRegisterName(Reg) << ", "
+               << getRegisterName(NextReg(Reg)) << '}';
+#if 0
       // FIXME: Breaks e.g. ARM/vmul.ll.
       assert(0);
       /*
@@ -44,6 +199,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << '{'
       << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
       << '}';*/
+#endif
     } else if (Modifier && strcmp(Modifier, "lane") == 0) {
       assert(0);
       /*
@@ -56,7 +212,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << getRegisterName(Reg);
     }
   } else if (Op.isImm()) {
-    assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+    assert((Modifier && !strcmp(Modifier, "call")) ||
+           ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
     O << '#' << Op.getImm();
   } else {
     assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
@@ -142,17 +299,17 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) {
   O << "[" << getRegisterName(MO1.getReg());
   
   if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
       O << ", #"
-      << (char)ARM_AM::getAM2Op(MO3.getImm())
-      << ARM_AM::getAM2Offset(MO3.getImm());
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+        << ARM_AM::getAM2Offset(MO3.getImm());
     O << "]";
     return;
   }
   
   O << ", "
-  << (char)ARM_AM::getAM2Op(MO3.getImm())
-  << getRegisterName(MO2.getReg());
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
   
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
     O << ", "
@@ -169,11 +326,14 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
     assert(ImmOffs && "Malformed indexed load / store!");
-    O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs;
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+      << ImmOffs;
     return;
   }
   
-  O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg());
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+    << getRegisterName(MO1.getReg());
   
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
     O << ", "
@@ -196,8 +356,8 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) {
   
   if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
     O << ", #"
-    << (char)ARM_AM::getAM3Op(MO3.getImm())
-    << ImmOffs;
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+      << ImmOffs;
   O << ']';
 }
 
@@ -214,35 +374,24 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
   assert(ImmOffs && "Malformed indexed load / store!");
-  O << "#"
-  << (char)ARM_AM::getAM3Op(MO2.getImm())
-  << ImmOffs;
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+    << ImmOffs;
 }
 
 
 void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum,
                                            const char *Modifier) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
   const MCOperand &MO2 = MI->getOperand(OpNum+1);
   ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
   if (Modifier && strcmp(Modifier, "submode") == 0) {
-    if (MO1.getReg() == ARM::SP) {
-      // FIXME
-      bool isLDM = (MI->getOpcode() == ARM::LDM ||
-                    MI->getOpcode() == ARM::LDM_RET ||
-                    MI->getOpcode() == ARM::t2LDM ||
-                    MI->getOpcode() == ARM::t2LDM_RET);
-      O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
-    } else
-      O << ARM_AM::getAMSubModeStr(Mode);
+    O << ARM_AM::getAMSubModeStr(Mode);
   } else if (Modifier && strcmp(Modifier, "wide") == 0) {
     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
     if (Mode == ARM_AM::ia)
       O << ".w";
   } else {
     printOperand(MI, OpNum);
-    if (ARM_AM::getAM4WBFlag(MO2.getImm()))
-      O << "!";
   }
 }
 
@@ -263,8 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
     O << getRegisterName(MO1.getReg());
-    if (ARM_AM::getAM5WBFlag(MO2.getImm()))
-      O << "!";
     return;
   }
   
@@ -272,7 +419,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   
   if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
     O << ", #"
-      << (char)ARM_AM::getAM5Op(MO2.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
       << ImmOffs*4;
   }
   O << "]";
@@ -281,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
   
-  // FIXME: No support yet for specifying alignment.
-  O << '[' << getRegisterName(MO1.getReg()) << ']';
-  
-  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
-    if (MO2.getReg() == 0)
-      O << '!';
-    else
-      O << ", " << getRegisterName(MO2.getReg());
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO2.getImm()) {
+    // FIXME: Both darwin as and GNU as violate ARM docs here.
+    O << ", :" << MO2.getImm();
   }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
 }
 
 void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
@@ -311,14 +463,56 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
 
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) {
   O << "{";
-  // Always skip the first operand, it's the optional (and implicit writeback).
-  for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum+1) O << ", ";
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != OpNum) O << ", ";
     O << getRegisterName(MI->getOperand(i).getReg());
   }
   O << "}";
 }
 
+void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned option = Op.getImm();
+  unsigned mode = option & 31;
+  bool changemode = option >> 5 & 1;
+  unsigned AIF = option >> 6 & 7;
+  unsigned imod = option >> 9 & 3;
+  if (imod == 2)
+    O << "ie";
+  else if (imod == 3)
+    O << "id";
+  O << '\t';
+  if (imod > 1) {
+    if (AIF & 4) O << 'a';
+    if (AIF & 2) O << 'i';
+    if (AIF & 1) O << 'f';
+    if (AIF > 0 && changemode) O << ", ";
+  }
+  if (changemode)
+    O << '#' << mode;
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned Mask = Op.getImm();
+  if (Mask) {
+    O << '_';
+    if (Mask & 8) O << 'f';
+    if (Mask & 4) O << 's';
+    if (Mask & 2) O << 'x';
+    if (Mask & 1) O << 'c';
+  }
+}
+
+void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum){
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << '#';
+  if (Op.getImm() < 0)
+    O << '-' << (-Op.getImm() - 1);
+  else
+    O << Op.getImm();
+}
+
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   if (CC != ARMCC::AL)
@@ -360,3 +554,191 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) {
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) {
   O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
 }
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
+  unsigned NumTZ = CountTrailingZeros_32(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op)
+{
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << getRegisterName(MO1.getReg());
+  O << ", " << getRegisterName(MO2.getReg()) << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op,
+                                                  unsigned Scale) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO3.getReg())
+    O << ", " << getRegisterName(MO3.getReg());
+  else if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs * Scale;
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op)
+{
+  printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op)
+{
+  printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op)
+{
+  printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI,unsigned Op) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs*4;
+  O << "]";
+}
+
+void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum) {
+  O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
+  if (MI->getOpcode() == ARM::t2TBH)
+    O << ", lsl #1";
+  O << ']';
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  O << getRegisterName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  unsigned OffImm = MO2.getImm();
+  if (OffImm)  // Don't print +0.
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+                                                unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << ", #" << OffImm * 4;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+                                                     unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+                                                        unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", " << getRegisterName(MO2.getReg());
+
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl #" << ShAmt;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum) {
+  O << '#' << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum) {
+  O << '#' << MI->getOperand(OpNum).getImm();
+}
+
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 9a3cbc3..d41b5df 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -48,32 +48,33 @@ public:
   void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
                              const char *Modifier = 0);
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum);
+  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum);
   void printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
                               const char *Modifier = 0);
 
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum);
 
   void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {}
+  void printThumbITMask(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum);
   void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
-                                    unsigned Scale) {}
-  void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {}
+                                    unsigned Scale);
+  void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum);
   
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum);
   
-  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {}
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {}
-  void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {}
+  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum);
+  void printNegZeroOperand(const MCInst *MI, unsigned OpNum);
   void printPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
@@ -82,10 +83,10 @@ public:
                           const char *Modifier);
   void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {}
   void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {}
-  void printTBAddrMode(const MCInst *MI, unsigned OpNum) {}
+  void printTBAddrMode(const MCInst *MI, unsigned OpNum);
   void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
-  void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
-  void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
+  void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum);
+  void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum);
   void printHex8ImmOperand(const MCInst *MI, int OpNum) {}
   void printHex16ImmOperand(const MCInst *MI, int OpNum) {}
   void printHex32ImmOperand(const MCInst *MI, int OpNum) {}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index d9942c8..c36fe63 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -64,16 +64,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
     NumRegs = 4;
     return true;
 
-  case ARM::VLD2LNq16a:
-  case ARM::VLD2LNq32a:
+  case ARM::VLD2LNq16:
+  case ARM::VLD2LNq32:
     FirstOpnd = 0;
     NumRegs = 2;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD2LNq16b:
-  case ARM::VLD2LNq32b:
+  case ARM::VLD2LNq16odd:
+  case ARM::VLD2LNq32odd:
     FirstOpnd = 0;
     NumRegs = 2;
     Offset = 1;
@@ -91,34 +91,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
     NumRegs = 3;
     return true;
 
-  case ARM::VLD3q8a:
-  case ARM::VLD3q16a:
-  case ARM::VLD3q32a:
+  case ARM::VLD3q8_UPD:
+  case ARM::VLD3q16_UPD:
+  case ARM::VLD3q32_UPD:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD3q8b:
-  case ARM::VLD3q16b:
-  case ARM::VLD3q32b:
+  case ARM::VLD3q8odd_UPD:
+  case ARM::VLD3q16odd_UPD:
+  case ARM::VLD3q32odd_UPD:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VLD3LNq16a:
-  case ARM::VLD3LNq32a:
+  case ARM::VLD3LNq16:
+  case ARM::VLD3LNq32:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD3LNq16b:
-  case ARM::VLD3LNq32b:
+  case ARM::VLD3LNq16odd:
+  case ARM::VLD3LNq32odd:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 1;
@@ -136,34 +136,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
     NumRegs = 4;
     return true;
 
-  case ARM::VLD4q8a:
-  case ARM::VLD4q16a:
-  case ARM::VLD4q32a:
+  case ARM::VLD4q8_UPD:
+  case ARM::VLD4q16_UPD:
+  case ARM::VLD4q32_UPD:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD4q8b:
-  case ARM::VLD4q16b:
-  case ARM::VLD4q32b:
+  case ARM::VLD4q8odd_UPD:
+  case ARM::VLD4q16odd_UPD:
+  case ARM::VLD4q32odd_UPD:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VLD4LNq16a:
-  case ARM::VLD4LNq32a:
+  case ARM::VLD4LNq16:
+  case ARM::VLD4LNq32:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD4LNq16b:
-  case ARM::VLD4LNq32b:
+  case ARM::VLD4LNq16odd:
+  case ARM::VLD4LNq32odd:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 1;
@@ -177,28 +177,28 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST2LNd8:
   case ARM::VST2LNd16:
   case ARM::VST2LNd32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 2;
     return true;
 
   case ARM::VST2q8:
   case ARM::VST2q16:
   case ARM::VST2q32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 4;
     return true;
 
-  case ARM::VST2LNq16a:
-  case ARM::VST2LNq32a:
-    FirstOpnd = 4;
+  case ARM::VST2LNq16:
+  case ARM::VST2LNq32:
+    FirstOpnd = 2;
     NumRegs = 2;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST2LNq16b:
-  case ARM::VST2LNq32b:
-    FirstOpnd = 4;
+  case ARM::VST2LNq16odd:
+  case ARM::VST2LNq32odd:
+    FirstOpnd = 2;
     NumRegs = 2;
     Offset = 1;
     Stride = 2;
@@ -211,39 +211,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST3LNd8:
   case ARM::VST3LNd16:
   case ARM::VST3LNd32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 3;
     return true;
 
-  case ARM::VST3q8a:
-  case ARM::VST3q16a:
-  case ARM::VST3q32a:
-    FirstOpnd = 5;
+  case ARM::VST3q8_UPD:
+  case ARM::VST3q16_UPD:
+  case ARM::VST3q32_UPD:
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST3q8b:
-  case ARM::VST3q16b:
-  case ARM::VST3q32b:
-    FirstOpnd = 5;
+  case ARM::VST3q8odd_UPD:
+  case ARM::VST3q16odd_UPD:
+  case ARM::VST3q32odd_UPD:
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VST3LNq16a:
-  case ARM::VST3LNq32a:
-    FirstOpnd = 4;
+  case ARM::VST3LNq16:
+  case ARM::VST3LNq32:
+    FirstOpnd = 2;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST3LNq16b:
-  case ARM::VST3LNq32b:
-    FirstOpnd = 4;
+  case ARM::VST3LNq16odd:
+  case ARM::VST3LNq32odd:
+    FirstOpnd = 2;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
@@ -256,39 +256,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST4LNd8:
   case ARM::VST4LNd16:
   case ARM::VST4LNd32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 4;
     return true;
 
-  case ARM::VST4q8a:
-  case ARM::VST4q16a:
-  case ARM::VST4q32a:
-    FirstOpnd = 5;
+  case ARM::VST4q8_UPD:
+  case ARM::VST4q16_UPD:
+  case ARM::VST4q32_UPD:
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST4q8b:
-  case ARM::VST4q16b:
-  case ARM::VST4q32b:
-    FirstOpnd = 5;
+  case ARM::VST4q8odd_UPD:
+  case ARM::VST4q16odd_UPD:
+  case ARM::VST4q32odd_UPD:
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VST4LNq16a:
-  case ARM::VST4LNq32a:
-    FirstOpnd = 4;
+  case ARM::VST4LNq16:
+  case ARM::VST4LNq32:
+    FirstOpnd = 2;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST4LNq16b:
-  case ARM::VST4LNq32b:
-    FirstOpnd = 4;
+  case ARM::VST4LNq16odd:
+  case ARM::VST4LNq32odd:
+    FirstOpnd = 2;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index f5ba155..f36d4ef 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -78,14 +78,16 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
       DebugLoc ndl = NMI->getDebugLoc();
       unsigned NPredReg = 0;
       ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
-      if (NCC == OCC) {
-        Mask |= (1 << Pos);
-      } else if (NCC != CC)
+      if (NCC == CC || NCC == OCC)
+        Mask |= (NCC & 1) << Pos;
+      else
         break;
       --Pos;
       ++MBBI;
     }
     Mask |= (1 << Pos);
+    // Tag along (firstcond[0] << 4) with the mask.
+    Mask |= (CC & 1) << 4;
     MIB.addImm(Mask);
     Modified = true;
     ++NumITs;
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
index 6d82875..d984556 100644
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -56,16 +56,16 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
         : InstAlpha<opcode, asmstr, itin> {    
   bits<5> Ra;
 
-  let OutOperandList = (ops GPRC:$RA);
-  let InOperandList = (ops);
+  let OutOperandList = (outs GPRC:$RA);
+  let InOperandList = (ins);
   let Inst{25-21} = Ra;
   let Inst{20-16} = 0;
   let Inst{15-0} = fc;
 }
 class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
         : InstAlpha<opcode, asmstr, itin> {    
-  let OutOperandList = (ops);
-  let InOperandList = (ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
   let Inst{25-21} = 0;
   let Inst{20-16} = 0;
   let Inst{15-0} = fc;
@@ -77,7 +77,7 @@ class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass
   bits<5> Rb;
   bits<14> disp;
 
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
 
   let Inst{25-21} = Ra;
@@ -92,7 +92,7 @@ class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> patt
   bits<5> Rb;
   bits<14> disp;
 
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
 
   let Inst{25-21} = Ra;
@@ -107,7 +107,7 @@ def target : Operand<OtherVT> {}
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
    : InstAlpha<opcode, asmstr, itin> {
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
   bits<64> Opc; //dummy
   bits<5> Ra;
@@ -122,8 +122,8 @@ let isBranch = 1, isTerminator = 1 in
 class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> 
     : InstAlpha<opcode, asmstr, itin> {
   let Pattern = pattern;
-  let OutOperandList = (ops);
-  let InOperandList = (ops target:$DISP);
+  let OutOperandList = (outs);
+  let InOperandList = (ins target:$DISP);
   bits<5> Ra;
   bits<21> disp;
 
@@ -250,7 +250,7 @@ class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, Ins
 //3.3.5
 class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
     : InstAlpha<opcode, asmstr, itin> {
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
   bits<26> Function;
 
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 91e58ce..d5d5e02 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -392,12 +392,12 @@ def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0
 
 
 let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
-  def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
-  def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+  def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+  def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
 }
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
-def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0", 
+def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", 
           [(brind GPRC:$RS)], s_jsr>; //Jump
 
 let isCall = 1, Ra = 26,
@@ -414,18 +414,18 @@ let isCall = 1, Ra = 26, Rb = 27, disp = 0,
             F0, F1,
             F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
             F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
-    def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+    def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
 }
 
 let isCall = 1, Ra = 23, Rb = 27, disp = 0,
     Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
-  def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+  def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
 
 
-def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
 
 
-let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDQ   : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
                  [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
 def LDQr  : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
@@ -445,7 +445,7 @@ def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
 }
 
 
-let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
 def STB   : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
                  [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
 def STBr  : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
@@ -465,7 +465,7 @@ def STQr  : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
 }
 
 //Load address
-let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDA   : MForm<0x08, 0, "lda $RA,$DISP($RB)",
                  [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
 def LDAr  : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
@@ -476,25 +476,25 @@ def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
                  [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address high
 }
 
-let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
 def STS  : MForm<0x26, 0, "sts $RA,$DISP($RB)",
                 [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
 def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
                 [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
 }
-let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDS  : MForm<0x22, 1, "lds $RA,$DISP($RB)",
                 [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
 def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
                 [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
 }
-let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
 def STT  : MForm<0x27, 0, "stt $RA,$DISP($RB)",
                  [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
 def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
                  [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
 }
-let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDT  : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
                 [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
 def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
@@ -570,15 +570,15 @@ def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
 
 
 //load address, rellocated gpdist form
-let OutOperandList = (ops GPRC:$RA),
-    InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
     mayLoad = 1 in {
 def LDAg  : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
 def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
 }
 
 //Load quad, rellocated literal form
-let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in 
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in 
 def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
                  [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
 def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
@@ -591,8 +591,8 @@ let OutOperandList = (outs GPRC:$RR),
 def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
 def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
 }
-let OutOperandList = (ops GPRC:$RA),
-    InOperandList = (ops s64imm:$DISP, GPRC:$RB),
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s64imm:$DISP, GPRC:$RB),
     mayLoad = 1 in {
 def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
 def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
@@ -611,11 +611,11 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
 
 //Floats
 
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
 def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
                    [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
 
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in {
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
 def ADDS  : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
                    [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
 def SUBS  : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
@@ -634,11 +634,11 @@ def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
 
 //Doubles
 
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
                    [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
 
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in {
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
 def ADDT  : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
                    [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
 def SUBT  : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
@@ -665,13 +665,13 @@ def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
 }
 
 //More CPYS forms:
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in {
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
 def CPYSTs  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
                    [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
 def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
                    [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
 }
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in {
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
 def CPYSSt  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
                    [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
 def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
@@ -680,7 +680,7 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
 }
 
 //conditional moves, floats
-let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
     isTwoAddress = 1 in {
 def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
 def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
@@ -690,7 +690,7 @@ def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>;
 def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
 }
 //conditional moves, doubles
-let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
     isTwoAddress = 1 in {
 def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
 def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
@@ -790,33 +790,33 @@ def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
 
 
 
-let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in 
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in 
 def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
         [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
-let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in 
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in 
 def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
         [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
 def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
     	[(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
 def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
         [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
 
 
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
         [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
         [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
         [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
 def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
                    [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
                    [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
 
@@ -829,20 +829,20 @@ def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
 //Branching
 /////////////////////////////////////////////////////////
 class br_icc<bits<6> opc, string asmstr>
-  : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst), 
+  : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), 
     !strconcat(asmstr, " $R,$dst"),  s_icbr>;
 class br_fcc<bits<6> opc, string asmstr>
-  : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst), 
+  : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), 
     !strconcat(asmstr, " $R,$dst"),  s_fbr>;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 let Ra = 31 in
 def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
 
-def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst), 
+def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), 
                     "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", 
                     s_icbr>;
-def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst), 
+def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), 
                     "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
                     s_fbr>;
 //Branches, int
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 88ff85f..e3c3993 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -29,7 +29,8 @@ def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
 
 def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            SDNPVariadic]>;
 
 def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
                     [SDNPHasChain, SDNPOptInFlag]>;
@@ -610,7 +611,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
 
 def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
                       "$dst = $cc;",
-                      [(set D:$dst, (zext JustCC:$cc))]>;
+                     [/*(set D:$dst, (zext JustCC:$cc))*/]>;
 
 def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
                    "$dst = cc;", []>;
@@ -859,10 +860,10 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
 def : Pat<(BfinCall (i32 texternalsym:$dst)),
           (CALLa texternalsym:$dst)>;
 
-def : Pat<(sext JustCC:$cc),
-          (NEG (MOVECC_zext JustCC:$cc))>;
-def : Pat<(anyext JustCC:$cc),
-          (MOVECC_zext JustCC:$cc)>;
+//def : Pat<(sext JustCC:$cc),
+//          (NEG (MOVECC_zext JustCC:$cc))>;
+//def : Pat<(anyext JustCC:$cc),
+//          (MOVECC_zext JustCC:$cc)>;
 def : Pat<(i16 (zext JustCC:$cc)),
           (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
 def : Pat<(i16 (sext JustCC:$cc)),
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index ea9480d..34a8d38 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -53,6 +53,10 @@ std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
 
 unsigned
 BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
 #define GET_FUNCTION_RECOGNIZER
 #include "BlackfinGenIntrinsics.inc"
 #undef GET_FUNCTION_RECOGNIZER
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index b1ba0d2..0c265ad 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -111,7 +111,8 @@ namespace {
     static char ID;
     explicit CWriter(formatted_raw_ostream &o)
       : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), 
-        TheModule(0), TAsm(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) {
+        TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
+        NextAnonValueNumber(0) {
       FPCounter = 0;
     }
 
@@ -147,6 +148,8 @@ namespace {
       delete IL;
       delete TD;
       delete Mang;
+      delete TCtx;
+      delete TAsm;
       FPConstantMap.clear();
       TypeNames.clear();
       ByValParams.clear();
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index ad12604..5068f77 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1133,16 +1133,14 @@ class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
       "xsbh\t$rDst, $rSrc",
       IntegerOp, pattern>;
 
-class XSBHVecInst<ValueType vectype>:
-    XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
-      [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
-
 class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
     XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
              pattern>;
 
 multiclass ExtendByteHalfword {
-  def v16i8:     XSBHVecInst<v8i16>;
+  def v16i8:     XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+                          [
+                  /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
   def r8:        XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
                           [(set R16C:$rDst, (sext R8C:$rSrc))]>;
   def r16:       XSBHInRegInst<R16C,
@@ -1200,8 +1198,8 @@ class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
       
 class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
     XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
-             [(set (out_vectype VECREG:$rDst),
-                   (sext (out_vectype VECREG:$rSrc)))]>;
+             [/*(set (out_vectype VECREG:$rDst),
+                   (sext (out_vectype VECREG:$rSrc)))*/]>;
       
 class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
     XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
@@ -4146,7 +4144,7 @@ def CFSif32 :
 def FESDvec :
     RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
       "fesd\t$rT, $rA", SPrecFP,
-      [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>;
+      [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
 
 def FESDf32 :
     RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 8507861..846c7ed 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -28,7 +28,8 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPUCallSeq,
 
 def SDT_SPUCall   : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
 def SPUcall       : SDNode<"SPUISD::CALL", SDT_SPUCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            SDNPVariadic]>;
 
 // Operand type constraints for vector shuffle/permute operations
 def SDT_SPUshuffle   : SDTypeProfile<1, 3, [
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index c8faffc..4931860 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -57,6 +57,10 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
 
 unsigned MBlazeIntrinsicInfo::
 lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
 #define GET_FUNCTION_RECOGNIZER
 #include "MBlazeGenIntrinsics.inc"
 #undef GET_FUNCTION_RECOGNIZER
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index cef3697..2b9e941 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -29,7 +29,8 @@ def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 
 // Call
 def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, 
-                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+                          SDNPVariadic]>;
 
 // Hi and Lo nodes are used to handle global addresses. Used on 
 // MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol 
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index b6eceb3..1001d29 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -184,7 +184,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       // by any chance, as we do not link in those as .bc lib. So these calls
       // are always external and it is safe to emit an extern.
       if (PAN::isMemIntrinsic(Sym->getName()))
-        LibcallDecls.push_back(createESName(Sym->getName()));
+        LibcallDecls.insert(Sym->getName());
 
       O << *Sym;
       break;
@@ -199,7 +199,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
           Printname = PAN::Rename(Sname);
         }
         // Record these decls, we need to print them in asm as extern.
-        LibcallDecls.push_back(createESName(Printname));
+        LibcallDecls.insert(Printname);
       }
 
       O << Printname;
@@ -221,18 +221,6 @@ void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
   O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
 }
 
-// This function is used to sort the decls list.
-// should return true if s1 should come before s2.
-static bool is_before(const char *s1, const char *s2) {
-  return strcmp(s1, s2) <= 0;
-}
-
-// This is used by list::unique below. 
-// unique will filter out duplicates if it knows them.
-static bool is_duplicate(const char *s1, const char *s2) {
-  return !strcmp(s1, s2);
-}
-
 /// printLibcallDecls - print the extern declarations for compiler 
 /// intrinsics.
 ///
@@ -241,12 +229,9 @@ void PIC16AsmPrinter::printLibcallDecls() {
   if (LibcallDecls.empty()) return;
 
   O << MAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n";
-  // Remove duplicate entries.
-  LibcallDecls.sort(is_before);
-  LibcallDecls.unique(is_duplicate);
 
-  for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); 
-       I != LibcallDecls.end(); I++) {
+  for (std::set<std::string>::const_iterator I = LibcallDecls.begin(),
+       E = LibcallDecls.end(); I != E; I++) {
     O << MAI->getExternDirective() << *I << "\n";
   }
   O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n";
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index 519be4c..8063fcc 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
 #include <list>
+#include <set>
 #include <string>
 
 namespace llvm {
@@ -80,7 +81,7 @@ namespace llvm {
     PIC16TargetLowering *PTLI;
     PIC16DbgInfo DbgInfo;
     const PIC16MCAsmInfo *PMAI;
-    std::list<const char *> LibcallDecls; // List of extern decls.
+    std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls.
     std::vector<const GlobalVariable *> ExternalVarDecls;
     std::vector<const GlobalVariable *> ExternalVarDefs;
   };
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 3a8bbfb..566f920 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -45,7 +45,7 @@ namespace llvm {
     
     PIC16Section(const StringRef &name, SectionKind K, const std::string &addr, 
                  int color)
-      : MCSection(K), Name(name), Address(addr), Color(color) {
+      : MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
     }
     
   public:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 845cd8f..532a3ec 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -111,9 +111,11 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
 def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                             SDNPVariadic]>;
 def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            SDNPVariadic]>;
 def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
@@ -124,16 +126,18 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 
 def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                             SDNPVariadic]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
 
 def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
-                        [SDNPHasChain,  SDNPOptInFlag]>;
+                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
 
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index cac6962..c4a7408 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
   if (TheTriple.getOS() == Triple::Darwin)
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index d88d508..9489580 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -527,11 +527,11 @@ let Uses = [O0, O1, O2, O3, O4, O5],
   def JMPLrr : F3_1<2, 0b111000,
                     (outs), (ins MEMrr:$ptr),
                     "call $ptr",
-                    [(call  ADDRrr:$ptr)]>;
+                    [(call ADDRrr:$ptr)]>;
   def JMPLri : F3_2<2, 0b111000,
                     (outs), (ins MEMri:$ptr),
                     "call $ptr",
-                    [(call  ADDRri:$ptr)]>;
+                    [(call ADDRri:$ptr)]>;
 }
 
 // Section B.28 - Read State Register Instructions
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index a75b85d..0d1af23 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -47,7 +47,7 @@ def SDT_Address             : SDTypeProfile<1, 1,
 def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
                      [SDNPHasChain, SDNPOptInFlag]>;
 def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
-                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
 def SystemZcallseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
                         [SDNPHasChain, SDNPOutFlag]>;
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 9a16808..643b397 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -460,6 +460,15 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
   case Type::StructTyID:
     // Get the layout annotation... which is lazily created on demand.
     return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::UnionTyID: {
+    const UnionType *UnTy = cast<UnionType>(Ty);
+    uint64_t Size = 0;
+    for (UnionType::element_iterator i = UnTy->element_begin(),
+             e = UnTy->element_end(); i != e; ++i) {
+      Size = std::max(Size, getTypeSizeInBits(*i));
+    }
+    return Size;
+  }
   case Type::IntegerTyID:
     return cast<IntegerType>(Ty)->getBitWidth();
   case Type::VoidTyID:
@@ -516,6 +525,17 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
     unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
     return std::max(Align, (unsigned)Layout->getAlignment());
   }
+  case Type::UnionTyID: {
+    const UnionType *UnTy = cast<UnionType>(Ty);
+    unsigned Align = 1;
+
+    // Unions need the maximum alignment of all their entries
+    for (UnionType::element_iterator i = UnTy->element_begin(), 
+             e = UnTy->element_end(); i != e; ++i) {
+      Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
+    }
+    return Align;
+  }
   case Type::IntegerTyID:
   case Type::VoidTyID:
     AlignType = INTEGER_ALIGN;
@@ -600,6 +620,11 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
 
       // Update Ty to refer to current element
       Ty = STy->getElementType(FieldNo);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
+        unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+        // Offset into union is canonically 0, but type changes
+        Ty = UnTy->getElementType(FieldNo);
     } else {
       // Update Ty to refer to current element
       Ty = cast<SequentialType>(Ty)->getElementType();
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index a093e2d..44722b3 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -317,7 +317,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
   case dwarf::DW_EH_PE_pcrel: {
     // Emit a label to the streamer for the current position.  This gives us
     // .-foo addressing.
-    MCSymbol *PCSym = getContext().GetOrCreateTemporarySymbol();
+    MCSymbol *PCSym = getContext().CreateTempSymbol();
     Streamer.EmitLabel(PCSym);
     const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
     return MCBinaryExpr::CreateSub(Res, PC, getContext());
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index dde86fb..47873d1 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -29,6 +29,9 @@ struct X86Operand;
 class X86ATTAsmParser : public TargetAsmParser {
   MCAsmParser &Parser;
 
+protected:
+  unsigned Is64Bit : 1;
+
 private:
   MCAsmParser &getParser() const { return Parser; }
 
@@ -45,6 +48,8 @@ private:
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
+  void InstructionCleanup(MCInst &Inst);
+
   /// @name Auto-generated Match Functions
   /// {  
 
@@ -62,7 +67,23 @@ public:
 
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
-  
+ 
+class X86_32ATTAsmParser : public X86ATTAsmParser {
+public:
+  X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+    : X86ATTAsmParser(T, _Parser) {
+    Is64Bit = false;
+  }
+};
+
+class X86_64ATTAsmParser : public X86ATTAsmParser {
+public:
+  X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+    : X86ATTAsmParser(T, _Parser) {
+    Is64Bit = true;
+  }
+};
+
 } // end anonymous namespace
 
 /// @name Auto-generated Match Functions
@@ -548,8 +569,10 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
       Operands.size() == 3 &&
       static_cast<X86Operand*>(Operands[1])->isImm() &&
       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
-      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1)
+      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
+    delete Operands[1];
     Operands.erase(Operands.begin() + 1);
+  }
 
   return false;
 }
@@ -586,12 +609,30 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   return false;
 }
 
+// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
+// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
+// proper mechanism for supporting (ambiguous) feature dependent instructions.
+void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
+  if (!Is64Bit) return;
+
+  switch (Inst.getOpcode()) {
+  case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
+  case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
+  case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
+  case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
+  case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
+  case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
+  case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
+  case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+  }
+}
+
 extern "C" void LLVMInitializeX86AsmLexer();
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
-  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
-  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
   LLVMInitializeX86AsmLexer();
 }
 
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index cbfc57a..7d29d97 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -427,7 +427,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     //   MYGLOBAL + (. - PICBASE)
     // However, we can't generate a ".", so just emit a new label here and refer
     // to it.
-    MCSymbol *DotSym = OutContext.GetOrCreateTemporarySymbol();
+    MCSymbol *DotSym = OutContext.CreateTempSymbol();
     OutStreamer.EmitLabel(DotSym);
     
     // Now that we have emitted the label, lower the complex operand expression.
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index a316860..7b7b5cb 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -459,11 +459,11 @@ static void translateInstruction(MCInst &mcInst,
   }
 }
 
-static const MCDisassembler *createX86_32Disassembler(const Target &T) {
+static MCDisassembler *createX86_32Disassembler(const Target &T) {
   return new X86Disassembler::X86_32Disassembler;
 }
 
-static const MCDisassembler *createX86_64Disassembler(const Target &T) {
+static MCDisassembler *createX86_64Disassembler(const Target &T) {
   return new X86Disassembler::X86_64Disassembler;
 }
 
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index a0a04ba..4f02ed4 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1355,8 +1355,8 @@ int decodeInstruction(struct InternalInstruction* insn,
   
   insn->length = insn->readerCursor - insn->startLocation;
   
-  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
-          startLoc, insn->readerCursor, insn->length);
+  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+            startLoc, insn->readerCursor, insn->length);
     
   if (insn->length > 15)
     dbgprintf(insn, "Instruction exceeds 15-byte limit");
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6a4bdb5..2be51e1 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -191,6 +191,7 @@ include "X86CallingConv.td"
 // Currently the X86 assembly parser only supports ATT syntax.
 def ATTAsmParser : AsmParser {
   string AsmParserClassName  = "ATTAsmParser";
+  string AsmParserInstCleanup  = "InstructionCleanup";
   int Variant = 0;
 
   // Discard comments in assembly strings.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index a44afc6..754a200 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -9,39 +9,100 @@
 
 #include "llvm/Target/TargetAsmBackend.h"
 #include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MachObjectWriter.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
 
 namespace {
 
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case X86::reloc_pcrel_1byte:
+  case FK_Data_1: return 0;
+  case FK_Data_2: return 1;
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
 class X86AsmBackend : public TargetAsmBackend {
 public:
   X86AsmBackend(const Target &T)
     : TargetAsmBackend(T) {}
+
+  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF,
+                  uint64_t Value) const {
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+
+    assert(Fixup.Offset + Size <= DF.getContents().size() &&
+           "Invalid fixup offset!");
+    for (unsigned i = 0; i != Size; ++i)
+      DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
+  }
+};
+
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+  ELFX86AsmBackend(const Target &T)
+    : X86AsmBackend(T) {
+    HasAbsolutizedSet = true;
+    HasScatteredSymbols = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return 0;
+  }
+
+  bool isVirtualSection(const MCSection &Section) const {
+    const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
+    return SE.getType() == MCSectionELF::SHT_NOBITS;;
+  }
 };
 
 class DarwinX86AsmBackend : public X86AsmBackend {
 public:
   DarwinX86AsmBackend(const Target &T)
-    : X86AsmBackend(T) {}
-
-  virtual bool hasAbsolutizedSet() const { return true; }
+    : X86AsmBackend(T) {
+    HasAbsolutizedSet = true;
+    HasScatteredSymbols = true;
+  }
 
-  virtual bool hasScatteredSymbols() const { return true; }
+  bool isVirtualSection(const MCSection &Section) const {
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
+            SMO.getType() == MCSectionMachO::S_GB_ZEROFILL);
+  }
 };
 
 class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
 public:
   DarwinX86_32AsmBackend(const Target &T)
     : DarwinX86AsmBackend(T) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return new MachObjectWriter(OS, /*Is64Bit=*/false);
+  }
 };
 
 class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
 public:
   DarwinX86_64AsmBackend(const Target &T)
-    : DarwinX86AsmBackend(T) {}
+    : DarwinX86AsmBackend(T) {
+    HasReliableSymbolDifference = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return new MachObjectWriter(OS, /*Is64Bit=*/true);
+  }
 
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
     // Temporary labels in the string literals sections require symbols. The
@@ -65,7 +126,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
   case Triple::Darwin:
     return new DarwinX86_32AsmBackend(T);
   default:
-    return new X86AsmBackend(T);
+    return new ELFX86AsmBackend(T);
   }
 }
 
@@ -75,6 +136,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
   case Triple::Darwin:
     return new DarwinX86_64AsmBackend(T);
   default:
-    return new X86AsmBackend(T);
+    return new ELFX86AsmBackend(T);
   }
 }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 96b652d..5d3edbb 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1166,6 +1166,21 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
+  case Intrinsic::stackprotector: {
+    // Emit code inline code to store the stack guard onto the stack.
+    EVT PtrTy = TLI.getPointerTy();
+
+    Value *Op1 = I.getOperand(1); // The guard's value.
+    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+    // Grab the frame index.
+    X86AddressMode AM;
+    if (!X86SelectAddress(Slot, AM)) return false;
+    
+    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+    
+    return true;
+  }
   case Intrinsic::objectsize: {
     ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
     const Type *Ty = I.getCalledFunction()->getReturnType();
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index c8dac3c..a8117d4 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -17,7 +17,8 @@ namespace X86 {
 enum Fixups {
   reloc_pcrel_4byte = FirstTargetFixupKind,  // 32-bit pcrel, e.g. a branch.
   reloc_pcrel_1byte,                         // 8-bit pcrel, e.g. branch_1
-  reloc_riprel_4byte                         // 32-bit rip-relative
+  reloc_riprel_4byte,                        // 32-bit rip-relative
+  reloc_riprel_4byte_movq_load               // 32-bit rip-relative in movq
 };
 }
 }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4058885..1c0ed7e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -140,6 +140,21 @@ namespace {
 }
 
 namespace {
+  class X86ISelListener : public SelectionDAG::DAGUpdateListener {
+    SmallSet<SDNode*, 4> Deletes;
+  public:
+    explicit X86ISelListener() {}
+    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+      Deletes.insert(N);
+    }
+    virtual void NodeUpdated(SDNode *N) {
+      // Ignore updates.
+    }
+    bool IsDeleted(SDNode *N) {
+      return Deletes.count(N);
+    }
+  };
+
   //===--------------------------------------------------------------------===//
   /// ISel - X86 specific code to select X86 machine instructions for
   /// SelectionDAG operations.
@@ -187,6 +202,7 @@ namespace {
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                 X86ISelListener &DeadNodes,
                                  unsigned Depth);
     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
     bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
@@ -651,7 +667,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 /// returning true if it cannot be done.  This just pattern matches for the
 /// addressing mode.
 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
-  if (MatchAddressRecursively(N, AM, 0))
+  X86ISelListener DeadNodes;
+  if (MatchAddressRecursively(N, AM, DeadNodes, 0))
     return true;
 
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -680,6 +697,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
 }
 
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                              X86ISelListener &DeadNodes,
                                               unsigned Depth) {
   bool is64Bit = Subtarget->is64Bit();
   DebugLoc dl = N.getDebugLoc();
@@ -845,7 +863,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
     // Test if the LHS of the sub can be folded.
     X86ISelAddressMode Backup = AM;
-    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM,
+                                DeadNodes, Depth+1) ||
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        DeadNodes.IsDeleted(N.getNode())) {
       AM = Backup;
       break;
     }
@@ -854,6 +876,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       AM = Backup;
       break;
     }
+
     int Cost = 0;
     SDValue RHS = N.getNode()->getOperand(1);
     // If the RHS involves a register with multiple uses, this
@@ -907,13 +930,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::ADD: {
     X86ISelAddressMode Backup = AM;
-    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
-        !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
-      return false;
+    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
+                                 DeadNodes, Depth+1)) {
+      if (DeadNodes.IsDeleted(N.getNode()))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return true;
+      if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
+                                   DeadNodes, Depth+1))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return DeadNodes.IsDeleted(N.getNode());
+    }
+
+    // Try again after commuting the operands.
     AM = Backup;
-    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
-        !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
-      return false;
+    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
+                                 DeadNodes, Depth+1)) {
+      if (DeadNodes.IsDeleted(N.getNode()))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return true;
+      if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
+                                   DeadNodes, Depth+1))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return DeadNodes.IsDeleted(N.getNode());
+    }
     AM = Backup;
 
     // If we couldn't fold both operands into the address at the same time,
@@ -935,16 +978,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
       X86ISelAddressMode Backup = AM;
       uint64_t Offset = CN->getSExtValue();
+
+      // Check to see if the LHS & C is zero.
+      if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue()))
+        break;
+
       // Start with the LHS as an addr mode.
-      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+      if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
           // Address could not have picked a GV address for the displacement.
           AM.GV == NULL &&
           // On x86-64, the resultant disp must fit in 32-bits.
           (!is64Bit ||
            X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
-                                             AM.hasSymbolicDisplacement())) &&
-          // Check to see if the LHS & C is zero.
-          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+                                             AM.hasSymbolicDisplacement()))) {
         AM.Disp += Offset;
         return false;
       }
@@ -1015,7 +1061,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           CurDAG->RepositionNode(N.getNode(), Shl.getNode());
           Shl.getNode()->setNodeId(N.getNode()->getNodeId());
         }
-        CurDAG->ReplaceAllUsesWith(N, Shl);
+        CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes);
         AM.IndexReg = And;
         AM.Scale = (1 << ScaleLog);
         return false;
@@ -1066,7 +1112,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
     }
 
-    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+    CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes);
     
     AM.Scale = 1 << ShiftCst;
     AM.IndexReg = NewAND;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7d2140b..704f9c6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2310,6 +2310,28 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
+  // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+  // Therefore if it's not used by the call it is not safe to optimize this into
+  // a sibcall.
+  bool Unused = false;
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    if (!Ins[i].Used) {
+      Unused = true;
+      break;
+    }
+  }
+  if (Unused) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CalleeCC, false, getTargetMachine(),
+                   RVLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+    for (unsigned i = 0; i != RVLocs.size(); ++i) {
+      CCValAssign &VA = RVLocs[i];
+      if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+        return false;
+    }
+  }
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 4262c0ac..8cbb756 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -144,7 +144,7 @@ let isCall = 1 in
     // NOTE: this pattern doesn't match "X86call imm", because we do not know
     // that the offset between an arbitrary immediate and the call will fit in
     // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : Ii32<0xE8, RawFrm,
+    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
                           (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
                           "call{q}\t$dst", []>,
                         Requires<[In64BitMode, NotWin64]>;
@@ -511,6 +511,14 @@ def ADD64rr    : RI<0x01, MRMDestReg, (outs GR64:$dst),
                     [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
                      (implicit EFLAGS)]>;
 
+// These are alternate spellings for use by the disassembler, we mark them as
+// code gen only to ensure they aren't matched by the assembler.
+let isCodeGenOnly = 1 in {
+  def ADD64rr_alt  : RI<0x03, MRMSrcReg, (outs GR64:$dst), 
+                       (ins GR64:$src1, GR64:$src2),
+                       "add{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
+
 // Register-Integer Addition
 def ADD64ri8  : RIi8<0x83, MRM0r, (outs GR64:$dst), 
                      (ins GR64:$src1, i64i8imm:$src2),
@@ -531,12 +539,6 @@ def ADD64rm     : RI<0x03, MRMSrcMem, (outs GR64:$dst),
                      [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
                       (implicit EFLAGS)]>;
 
-// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but
-//   differently encoded.
-def ADD64mrmrr  : RI<0x03, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-
 } // isTwoAddress
 
 // Memory-Register Addition
@@ -1225,59 +1227,59 @@ let Defs = [EFLAGS] in {
 def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
                       "test{q}\t{$src, %rax|%rax, $src}", []>;
 let isCommutable = 1 in
-def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
                   "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(X86cmp (and GR64:$src1, GR64:$src2), 0),
-                   (implicit EFLAGS)]>;
+                  [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>;
 def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
                   "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0),
-                   (implicit EFLAGS)]>;
+                  [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)),
+                    0))]>;
 def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
                                         (ins GR64:$src1, i64i32imm:$src2),
                        "test{q}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2),
+                      0))]>;
 def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
                                         (ins i64mem:$src1, i64i32imm:$src2),
                        "test{q}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1),
+                                           i64immSExt32:$src2), 0))]>;
 
 
 def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
                      "cmp{q}\t{$src, %rax|%rax, $src}", []>;
 def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(X86cmp GR64:$src1, GR64:$src2),
-                  (implicit EFLAGS)]>;
-def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                    "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
+                 [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>;
+
+// These are alternate spellings for use by the disassembler, we mark them as
+// code gen only to ensure they aren't matched by the assembler.
+let isCodeGenOnly = 1 in {
+  def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+                      "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
+}
+
 def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(X86cmp (loadi64 addr:$src1), GR64:$src2),
-                   (implicit EFLAGS)]>;
+                 [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>;
 def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(X86cmp GR64:$src1, (loadi64 addr:$src2)),
-                  (implicit EFLAGS)]>;
+                 [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>;
 def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp GR64:$src1, i64immSExt8:$src2),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>;
 def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
                       "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(X86cmp GR64:$src1, i64immSExt32:$src2),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>;
 def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
+                                          i64immSExt8:$src2))]>;
 def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
                                        (ins i64mem:$src1, i64i32imm:$src2),
                       "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
+                                            i64immSExt32:$src2))]>;
 } // Defs = [EFLAGS]
 
 // Bit tests.
@@ -1285,8 +1287,7 @@ def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
 let Defs = [EFLAGS] in {
 def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                "bt{q}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt GR64:$src1, GR64:$src2),
-                (implicit EFLAGS)]>, TB;
+               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
@@ -1300,15 +1301,14 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
 
 def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt GR64:$src1, i64immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
 def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+                                     i64immSExt8:$src2))]>, TB;
 
 def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
@@ -1938,7 +1938,7 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
 // Comparisons.
 
 // TEST R,R is smaller than CMP R,0
-def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR64:$src1, 0),
           (TEST64rr GR64:$src1, GR64:$src1)>;
 
 // Conditional moves with folded loads with operands swapped and conditions
@@ -2233,21 +2233,6 @@ def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (ADD64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Addition with EFLAGS result
-def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), 
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Subtraction with EFLAGS result
 def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2266,24 +2251,6 @@ def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2),
                     (implicit EFLAGS)),
           (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
 
-// Memory-Register Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB64mr addr:$dst, GR64:$src2)>;
-
-// Memory-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), 
-                                        i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst),
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Signed Integer Multiplication with EFLAGS result
 def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2313,36 +2280,18 @@ def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2),
 // INC and DEC with EFLAGS result. Note that these do not set CF.
 def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
           (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC64_16m addr:$dst)>, Requires<[In64BitMode]>;
 def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
           (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>;
 
 def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
           (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC64_32m addr:$dst)>, Requires<[In64BitMode]>;
 def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
           (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>;
 
 def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)),
           (INC64r GR64:$src)>;
-def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC64m addr:$dst)>;
 def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)),
           (DEC64r GR64:$src)>;
-def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC64m addr:$dst)>;
 
 // Register-Register Logical Or with EFLAGS result
 def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2),
@@ -2362,20 +2311,6 @@ def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (OR64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Logical Or with EFLAGS result
-def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Logical XOr with EFLAGS result
 def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2394,21 +2329,6 @@ def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (XOR64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Logical XOr with EFLAGS result
-def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), 
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Logical And with EFLAGS result
 def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2427,21 +2347,6 @@ def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (AND64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Logical And with EFLAGS result
-def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), 
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 //===----------------------------------------------------------------------===//
 // X86-64 SSE Instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index b730918..e6d1fee 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -562,15 +562,13 @@ def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
 def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
                         []>;  // FPSW = cmp ST(0) with ST(i)
                         
+// CC = ST(0) cmp ST(i)
 def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
-                  [(X86cmp RFP32:$lhs, RFP32:$rhs),
-                   (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
 def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
-                  [(X86cmp RFP64:$lhs, RFP64:$rhs),
-                   (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
 def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
-                  [(X86cmp RFP80:$lhs, RFP80:$rhs),
-                   (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
 }
 
 let Defs = [EFLAGS], Uses = [ST0] in {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 1225b68..c80a18d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -21,8 +21,7 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                    SDTCisInt<0>, SDTCisInt<3>]>;
 
-// FIXME: Should be modelled as returning i32
-def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
 
 def SDTX86Cmov    : SDTypeProfile<1, 4,
                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
@@ -120,12 +119,12 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
                         [SDNPHasChain, SDNPMayStore, 
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
-                        [SDNPHasChain, SDNPOptInFlag]>;
+                        [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
 
 def X86vastart_save_xmm_regs :
                  SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
                         SDT_X86VASTART_SAVE_XMM_REGS,
-                        [SDNPHasChain]>;
+                        [SDNPHasChain, SDNPVariadic]>;
 
 def X86callseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
@@ -135,7 +134,8 @@ def X86callseq_end :
                         [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;       
 
 def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
-                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+                         SDNPVariadic]>;
 
 def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
@@ -158,7 +158,7 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
 def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, 
-                        [SDNPHasChain,  SDNPOptInFlag]>;
+                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
 
 def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
@@ -661,9 +661,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 
 // Loop instructions
 
-def LOOP   : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>;
-def LOOPE  : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>;
+def LOOP   : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -3200,17 +3200,16 @@ let Defs = [EFLAGS] in {
 let isCommutable = 1 in {   // TEST X, Y   --> TEST Y, X
 def TEST8rr  : I<0x84, MRMSrcReg, (outs),  (ins GR8:$src1, GR8:$src2),
                      "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and_su GR8:$src1, GR8:$src2), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
 def TEST16rr : I<0x85, MRMSrcReg, (outs),  (ins GR16:$src1, GR16:$src2),
                      "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and_su GR16:$src1, GR16:$src2), 0),
-                      (implicit EFLAGS)]>,
+                     [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
+                      0))]>,
                  OpSize;
 def TEST32rr : I<0x85, MRMSrcReg, (outs),  (ins GR32:$src1, GR32:$src2),
                      "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and_su GR32:$src1, GR32:$src2), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
+                      0))]>;
 }
 
 def TEST8i8  : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
@@ -3222,48 +3221,46 @@ def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
 
 def TEST8rm  : I<0x84, MRMSrcMem, (outs),  (ins GR8 :$src1, i8mem :$src2),
                      "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
+                       0))]>;
 def TEST16rm : I<0x85, MRMSrcMem, (outs),  (ins GR16:$src1, i16mem:$src2),
                      "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0),
-                      (implicit EFLAGS)]>, OpSize;
+                     [(set EFLAGS, (X86cmp (and GR16:$src1,
+                                         (loadi16 addr:$src2)), 0))]>, OpSize;
 def TEST32rm : I<0x85, MRMSrcMem, (outs),  (ins GR32:$src1, i32mem:$src2),
                      "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and GR32:$src1,
+                                                (loadi32 addr:$src2)), 0))]>;
 
 def TEST8ri  : Ii8 <0xF6, MRM0r,                     // flags = GR8  & imm8
                     (outs),  (ins GR8:$src1, i8imm:$src2),
                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and_su GR8:$src1, imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
 def TEST16ri : Ii16<0xF7, MRM0r,                     // flags = GR16 & imm16
                     (outs),  (ins GR16:$src1, i16imm:$src2),
                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and_su GR16:$src1, imm:$src2), 0),
-                     (implicit EFLAGS)]>, OpSize;
+                    [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
+                    OpSize;
 def TEST32ri : Ii32<0xF7, MRM0r,                     // flags = GR32 & imm32
                     (outs),  (ins GR32:$src1, i32imm:$src2),
                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and_su GR32:$src1, imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
 
 def TEST8mi  : Ii8 <0xF6, MRM0m,                   // flags = [mem8]  & imm8
                     (outs), (ins i8mem:$src1, i8imm:$src2),
                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
+                     0))]>;
 def TEST16mi : Ii16<0xF7, MRM0m,                   // flags = [mem16] & imm16
                     (outs), (ins i16mem:$src1, i16imm:$src2),
                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0),
-                     (implicit EFLAGS)]>, OpSize;
+                    [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
+                     0))]>, OpSize;
 def TEST32mi : Ii32<0xF7, MRM0m,                   // flags = [mem32] & imm32
                     (outs), (ins i32mem:$src1, i32imm:$src2),
                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
+                     0))]>;
 } // Defs = [EFLAGS]
 
 
@@ -3477,45 +3474,41 @@ def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
 def CMP8rr  : I<0x38, MRMDestReg,
                 (outs), (ins GR8 :$src1, GR8 :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
 def CMP16rr : I<0x39, MRMDestReg,
                 (outs), (ins GR16:$src1, GR16:$src2),
                 "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize;
+                [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
 def CMP32rr : I<0x39, MRMDestReg,
                 (outs), (ins GR32:$src1, GR32:$src2),
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
 def CMP8mr  : I<0x38, MRMDestMem,
                 (outs), (ins i8mem :$src1, GR8 :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (loadi8 addr:$src1), GR8:$src2),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
 def CMP16mr : I<0x39, MRMDestMem,
                 (outs), (ins i16mem:$src1, GR16:$src2),
                 "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (loadi16 addr:$src1), GR16:$src2),
-                 (implicit EFLAGS)]>, OpSize;
+                [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
+                 OpSize;
 def CMP32mr : I<0x39, MRMDestMem,
                 (outs), (ins i32mem:$src1, GR32:$src2),
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (loadi32 addr:$src1), GR32:$src2),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
 def CMP8rm  : I<0x3A, MRMSrcMem,
                 (outs), (ins GR8 :$src1, i8mem :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR8:$src1, (loadi8 addr:$src2)),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
 def CMP16rm : I<0x3B, MRMSrcMem,
                 (outs), (ins GR16:$src1, i16mem:$src2),
                 "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR16:$src1, (loadi16 addr:$src2)),
-                 (implicit EFLAGS)]>, OpSize;
+                [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
+                 OpSize;
 def CMP32rm : I<0x3B, MRMSrcMem,
                 (outs), (ins GR32:$src1, i32mem:$src2),
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR32:$src1, (loadi32 addr:$src2)),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
 
 // These are alternate spellings for use by the disassembler, we mark them as
 // code gen only to ensure they aren't matched by the assembler.
@@ -3531,51 +3524,47 @@ let isCodeGenOnly = 1 in {
 def CMP8ri  : Ii8<0x80, MRM7r,
                   (outs), (ins GR8:$src1, i8imm:$src2),
                   "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                  [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>;
+                  [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
 def CMP16ri : Ii16<0x81, MRM7r,
                    (outs), (ins GR16:$src1, i16imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR16:$src1, imm:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
 def CMP32ri : Ii32<0x81, MRM7r,
                    (outs), (ins GR32:$src1, i32imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
 def CMP8mi  : Ii8 <0x80, MRM7m,
                    (outs), (ins i8mem :$src1, i8imm :$src2),
                    "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi8 addr:$src1), imm:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
 def CMP16mi : Ii16<0x81, MRM7m,
                    (outs), (ins i16mem:$src1, i16imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi16 addr:$src1), imm:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
+                   OpSize;
 def CMP32mi : Ii32<0x81, MRM7m,
                    (outs), (ins i32mem:$src1, i32imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi32 addr:$src1), imm:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
 def CMP16ri8 : Ii8<0x83, MRM7r,
                    (outs), (ins GR16:$src1, i16i8imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR16:$src1, i16immSExt8:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
+                    OpSize;
 def CMP16mi8 : Ii8<0x83, MRM7m,
                    (outs), (ins i16mem:$src1, i16i8imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
+                                         i16immSExt8:$src2))]>, OpSize;
 def CMP32mi8 : Ii8<0x83, MRM7m,
                    (outs), (ins i32mem:$src1, i32i8imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
+                                         i32immSExt8:$src2))]>;
 def CMP32ri8 : Ii8<0x83, MRM7r,
                    (outs), (ins GR32:$src1, i32i8imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR32:$src1, i32immSExt8:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
 } // Defs = [EFLAGS]
 
 // Bit tests.
@@ -3583,12 +3572,10 @@ def CMP32ri8 : Ii8<0x83, MRM7r,
 let Defs = [EFLAGS] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt GR16:$src1, GR16:$src2),
-                (implicit EFLAGS)]>, OpSize, TB;
+               [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
 def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                "bt{l}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt GR32:$src1, GR32:$src2),
-                (implicit EFLAGS)]>, TB;
+               [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
@@ -3610,23 +3597,22 @@ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
 
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt GR16:$src1, i16immSExt8:$src2),
-                 (implicit EFLAGS)]>, OpSize, TB;
+                [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+                OpSize, TB;
 def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt GR32:$src1, i32immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
 def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2),
-                 (implicit EFLAGS)]>, OpSize, TB;
+                [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+                 ]>, OpSize, TB;
 def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+                 ]>, TB;
 
 def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
@@ -4401,11 +4387,11 @@ def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
 // Comparisons.
 
 // TEST R,R is smaller than CMP R,0
-def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR8:$src1, 0),
           (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR16:$src1, 0),
           (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR32:$src1, 0),
           (TEST32rr GR32:$src1, GR32:$src1)>;
 
 // Conditional moves with folded loads with operands swapped and conditions
@@ -4799,42 +4785,6 @@ def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register Addition with EFLAGS result
-def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer Addition with EFLAGS result
-def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // Register-Register Subtraction with EFLAGS result
 def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2),
                     (implicit EFLAGS)),
@@ -4874,43 +4824,6 @@ def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
-
-
 // Register-Register Signed Integer Multiply with EFLAGS result
 def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2),
                     (implicit EFLAGS)),
@@ -4969,36 +4882,18 @@ def : Pat<(parallel (X86smul_flag GR32:$src1, 2),
 // INC and DEC with EFLAGS result. Note that these do not set CF.
 def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)),
           (INC8r GR8:$src)>;
-def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC8m addr:$dst)>;
 def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)),
           (DEC8r GR8:$src)>;
-def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC8m addr:$dst)>;
 
 def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
           (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC16m addr:$dst)>, Requires<[In32BitMode]>;
 def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
           (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC16m addr:$dst)>, Requires<[In32BitMode]>;
 
 def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
           (INC32r GR32:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC32m addr:$dst)>, Requires<[In32BitMode]>;
 def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
           (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC32m addr:$dst)>, Requires<[In32BitMode]>;
 
 // Register-Register Or with EFLAGS result
 def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2),
@@ -5039,42 +4934,6 @@ def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register Or with EFLAGS result
-def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer Or with EFLAGS result
-def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // Register-Register XOr with EFLAGS result
 def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2),
                     (implicit EFLAGS)),
@@ -5114,42 +4973,6 @@ def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register XOr with EFLAGS result
-def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer XOr with EFLAGS result
-def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // Register-Register And with EFLAGS result
 def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2),
                     (implicit EFLAGS)),
@@ -5189,42 +5012,6 @@ def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register And with EFLAGS result
-def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer And with EFLAGS result
-def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // -disable-16bit support.
 def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst),
           (MOV16mi addr:$dst, imm:$src)>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 18f9e52..720b663 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -513,11 +513,10 @@ let mayLoad = 1 in
 let Defs = [EFLAGS] in {
 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
                    "ucomiss\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>;
 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
                    "ucomiss\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR32:$src1, (loadf32 addr:$src2)),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
                     
 def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                   "comiss\t{$src2, $src1|$src1, $src2}", []>;
@@ -546,21 +545,21 @@ let Constraints = "$src1 = $dst" in {
 let Defs = [EFLAGS] in {
 def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                        "ucomiss\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v4f32 VR128:$src1), VR128:$src2),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
+                                               VR128:$src2))]>;
 def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
                        "ucomiss\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
+                                               (load addr:$src2)))]>;
 
 def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                       "comiss\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v4f32 VR128:$src1), VR128:$src2),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
+                                             VR128:$src2))]>;
 def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                       "comiss\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v4f32 VR128:$src1), (load addr:$src2)),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
+                                             (load addr:$src2)))]>;
 } // Defs = [EFLAGS]
 
 // Aliases of packed SSE1 instructions for scalar use. These all have names
@@ -1298,11 +1297,10 @@ let mayLoad = 1 in
 let Defs = [EFLAGS] in {
 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
                    "ucomisd\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>;
 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
                    "ucomisd\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR64:$src1, (loadf64 addr:$src2)),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>;
 } // Defs = [EFLAGS]
 
 // Aliases to match intrinsics which expect XMM operand(s).
@@ -1324,21 +1322,21 @@ let Constraints = "$src1 = $dst" in {
 let Defs = [EFLAGS] in {
 def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                        "ucomisd\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
+                                               VR128:$src2))]>;
 def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
                        "ucomisd\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
+                                               (load addr:$src2)))]>;
 
 def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                       "comisd\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
+                                             VR128:$src2))]>;
 def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                       "comisd\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v2f64 VR128:$src1), (load addr:$src2)),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
+                                             (load addr:$src2)))]>;
 } // Defs = [EFLAGS]
 
 // Aliases of packed SSE2 instructions for scalar use. These all have names
@@ -3825,54 +3823,65 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
 let Constraints = "$src1 = $dst" in {
   def CRC32m8  : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
                       (ins GR32:$src1, i8mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_8 GR32:$src1,
-                         (load addr:$src2)))]>, OpSize;
+                         (load addr:$src2)))]>;
   def CRC32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
                       (ins GR32:$src1, GR8:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
-                         (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>,
-                         OpSize;
+                         (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>;
   def CRC32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
                       (ins GR32:$src1, i16mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_16 GR32:$src1,
                          (load addr:$src2)))]>,
                          OpSize;
   def CRC32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
                       (ins GR32:$src1, GR16:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
                          OpSize;
   def CRC32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
                       (ins GR32:$src1, i32mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_32 GR32:$src1,
-                         (load addr:$src2)))]>, OpSize;
+                         (load addr:$src2)))]>;
   def CRC32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
                       (ins GR32:$src1, GR32:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
-                         (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>,
-                         OpSize;
-  def CRC64m64  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+                         (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>;
+  def CRC64m8  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i8mem:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_8 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         REX_W;
+  def CRC64r8  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR8:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>,
+                         REX_W;
+  def CRC64m64  : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
                       (ins GR64:$src1, i64mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
                        [(set GR64:$dst,
-                         (int_x86_sse42_crc32_64 GR64:$src1,
+                         (int_x86_sse42_crc64_64 GR64:$src1,
                          (load addr:$src2)))]>,
-                         OpSize, REX_W;
-  def CRC64r64  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+                         REX_W;
+  def CRC64r64  : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
                       (ins GR64:$src1, GR64:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
                        [(set GR64:$dst,
-                         (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
-                         OpSize, REX_W;
+                         (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>,
+                         REX_W;
 }
 
 // String/text processing instructions.
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 3f18696..a9681e6 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -38,14 +38,15 @@ public:
   ~X86MCCodeEmitter() {}
 
   unsigned getNumFixupKinds() const {
-    return 3;
+    return 4;
   }
 
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
     const static MCFixupKindInfo Infos[] = {
-      { "reloc_pcrel_4byte", 0, 4 * 8 },
-      { "reloc_pcrel_1byte", 0, 1 * 8 },
-      { "reloc_riprel_4byte", 0, 4 * 8 }
+      { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
     };
     
     if (Kind < FirstTargetFixupKind)
@@ -165,7 +166,8 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
   // If the fixup is pc-relative, we need to bias the value to be relative to
   // the start of the field, not the end of the field.
   if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
-      FixupKind == MCFixupKind(X86::reloc_riprel_4byte))
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
     ImmOffset -= 4;
   if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
     ImmOffset -= 1;
@@ -197,6 +199,15 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
            "Invalid rip-relative address");
     EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
     
+    unsigned FixupKind = X86::reloc_riprel_4byte;
+    
+    // movq loads are handled with a special relocation form which allows the
+    // linker to eliminate some loads for GOT references which end up in the
+    // same linkage unit.
+    if (MI.getOpcode() == X86::MOV64rm ||
+        MI.getOpcode() == X86::MOV64rm_TC)
+      FixupKind = X86::reloc_riprel_4byte_movq_load;
+    
     // rip-relative addressing is actually relative to the *next* instruction.
     // Since an immediate can follow the mod/rm byte for an instruction, this
     // means that we need to bias the immediate field of the instruction with
@@ -204,7 +215,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // expression to emit.
     int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
     
-    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte),
+    EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
                   CurByte, OS, Fixups, -ImmSize);
     return;
   }
@@ -269,7 +280,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // Emit the normal disp32 encoding.
     EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
     ForceDisp32 = true;
-  } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) {
+  } else if (Disp.getImm() == 0 &&
+             // Base reg can't be anything that ends up with '5' as the base
+             // reg, it is the magic [*] nomenclature that indicates no base.
+             BaseRegNo != N86::EBP) {
     // Emit no displacement ModR/M byte
     EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
   } else if (isDisp8(Disp.getImm())) {
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index f907614..cd56816 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -366,12 +366,3 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   if (StackAlignment)
     stackAlignment = StackAlignment;
 }
-
-bool X86Subtarget::enablePostRAScheduler(
-            CodeGenOpt::Level OptLevel,
-            TargetSubtarget::AntiDepBreakMode& Mode,
-            RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
-  CriticalPathRCs.clear();
-  return OptLevel >= CodeGenOpt::Aggressive;
-}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 50338d3..56220db 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -230,12 +230,6 @@ public:
   /// indicating the number of scheduling cycles of backscheduling that
   /// should be attempted.
   unsigned getSpecialAddressLatency() const;
-
-  /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling
-  /// at 'More' optimization level.
-  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& Mode,
-                             RegClassVector& CriticalPathRCs) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 56ddaf8..f13e6f3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 46805d5..2e9a1e5 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -29,7 +29,8 @@ include "XCoreInstrFormats.td"
 // Call
 def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                             SDNPVariadic]>;
 
 def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTNone,
                          [SDNPHasChain, SDNPOptInFlag]>;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7b1e9c0..d8e97a2 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -622,12 +622,12 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V,
         return false;  // Storing the value.
       }
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
-      if (CI->getOperand(0) != V) {
+      if (CI->getCalledValue() != V) {
         //cerr << "NONTRAPPING USE: " << **UI;
         return false;  // Not calling the ptr
       }
     } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
-      if (II->getOperand(0) != V) {
+      if (II->getCalledValue() != V) {
         //cerr << "NONTRAPPING USE: " << **UI;
         return false;  // Not calling the ptr
       }
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bdb46eb..65f2e15 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -820,7 +820,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       
       // We cannot remove an invoke, because it would change the CFG, just
       // change the callee to a null pointer.
-      cast<InvokeInst>(OldCall)->setOperand(0,
+      cast<InvokeInst>(OldCall)->setCalledFunction(
                                     Constant::getNullValue(CalleeF->getType()));
       return 0;
     }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index de93e9f..eb04d94 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -307,6 +307,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
       }
     }
   }
+
+  // The insertion point instruction may have been deleted; clear it out
+  // so that the rewriter doesn't trip over it later.
+  Rewriter.clearInsertPoint();
 }
 
 void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 05027ae..22f3628 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -1400,6 +1400,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
+        } else if (Name == "strchr" ||
+                   Name == "strrchr") {
+          if (FTy->getNumParams() != 2 ||
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isIntegerTy())
+            continue;
+          setOnlyReadsMemory(F);
+          setDoesNotThrow(F);
         } else if (Name == "strcpy" ||
                    Name == "stpcpy" ||
                    Name == "strcat" ||
@@ -1428,7 +1436,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         } else if (Name == "strcmp" ||
                    Name == "strspn" ||
                    Name == "strncmp" ||
-                   Name ==" strcspn" ||
+                   Name == "strcspn" ||
                    Name == "strcoll" ||
                    Name == "strcasecmp" ||
                    Name == "strncasecmp") {
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index fd74241..0eb9f02 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1027,6 +1027,15 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     return;
   }
 
+  if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) {
+    Out << "{ ";
+    TypePrinter.print(CU->getOperand(0)->getType(), Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine);
+    Out << " }";
+    return;
+  }
+  
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     const Type *ETy = CP->getType()->getElementType();
     assert(CP->getNumOperands() > 0 &&
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 721e96a..f141382 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1623,10 +1623,6 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     MDNode *MD = cast<MDNode>(CI.getOperand(1));
     Assert1(MD->getNumOperands() == 1,
                 "invalid llvm.dbg.declare intrinsic call 2", &CI);
-    if (MD->getOperand(0))
-      if (Constant *C = dyn_cast<Constant>(MD->getOperand(0)))
-        Assert1(C && !isa<ConstantPointerNull>(C),
-                "invalid llvm.dbg.declare intrinsic call 3", &CI);
   } break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
index a4e7685..f17d059 100644
--- a/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -4,8 +4,8 @@
 
 define arm_aapcscc void @g() {
 entry:
-;CHECK: [sp, #+8]
-;CHECK: [sp, #+12]
+;CHECK: [sp, #8]
+;CHECK: [sp, #12]
 ;CHECK: [sp]
         tail call arm_aapcscc  void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
         ret void
diff --git a/test/CodeGen/ARM/2009-10-30.ll b/test/CodeGen/ARM/2009-10-30.ll
index 90a5bd2..87d1a8b 100644
--- a/test/CodeGen/ARM/2009-10-30.ll
+++ b/test/CodeGen/ARM/2009-10-30.ll
@@ -6,7 +6,7 @@ define void @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) {
 entry:
 ;CHECK: sub	sp, sp, #4
 ;CHECK: add	r{{[0-9]+}}, sp, #8
-;CHECK: str	r{{[0-9]+}}, [sp], #+4
+;CHECK: str	r{{[0-9]+}}, [sp], #4
 ;CHECK: bx	lr
 	%ap = alloca i8*, align 4
 	%ap1 = bitcast i8** %ap to i8*
diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
new file mode 100644
index 0000000..31525ef
--- /dev/null
+++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv5-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
+; CHECK: ldmia	sp!, {r11, pc}
+  ret i32 %1
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 52ab871..fb0f8ff 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -5,7 +5,7 @@
 
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
-; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
+; CHECK: str r1, [{{r.*}}, {{r.*}}, lsl #2]
         icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
         br i1 %0, label %return, label %bb
 
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 886c0d5..adb4497 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -41,7 +41,7 @@ define i32 @test1() {
 ; DarwinPIC: _test1:
 ; DarwinPIC: 	ldr r0, LCPI1_0
 ; DarwinPIC: LPC1_0:
-; DarwinPIC:    ldr r0, [pc, +r0]
+; DarwinPIC:    ldr r0, [pc, r0]
 ; DarwinPIC:    ldr r0, [r0]
 ; DarwinPIC:    bx lr
 
@@ -63,7 +63,7 @@ define i32 @test1() {
 	
 ; LinuxPIC: .LPC1_0:
 ; LinuxPIC: 	add r0, pc, r0
-; LinuxPIC: 	ldr r0, [r1, +r0]
+; LinuxPIC: 	ldr r0, [r1, r0]
 ; LinuxPIC: 	ldr r0, [r0]
 ; LinuxPIC: 	bx lr
 
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 623f2cb..8677ce5 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -11,7 +11,7 @@ entry:
 
 define void @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
-; CHECK: ldmfdlt sp!, {r7, pc}
+; CHECK: ldmialt sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index d7fcf7d..342208b 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmfdhi | count 1
+; RUN:   grep ldmiahi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index c60ad93..eb97085 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmfdeq | count 1
+; RUN:   grep ldmiaeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index a7da834..1e39060 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmfdne | count 1
+; RUN:   grep ldmiane | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
 
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 1a016a0..9a2dc82 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -24,7 +24,7 @@ define i32 @t2() {
 define i32 @t3() {
 ; CHECK: t3:
 ; CHECK: ldmib
-; CHECK: ldmfd sp!
+; CHECK: ldmia sp!
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index c366e2d..895562a 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -10,10 +10,10 @@ entry:
 ;V6:   ldrd r2, [r2]
 
 ;V5:   ldr r3, [r2]
-;V5:   ldr r2, [r2, #+4]
+;V5:   ldr r2, [r2, #4]
 
 ;EABI: ldr r3, [r2]
-;EABI: ldr r2, [r2, #+4]
+;EABI: ldr r2, [r2, #4]
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index f8d3df2..553cd64 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #4}
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index d932f90..57370c4 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -7,7 +7,7 @@
 
 define i32 @f() {
 ; CHECK-NONPIC: f:
-; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
 ; CHECK-NONPIC: i(gottpoff)
 ; CHECK-PIC: f:
 ; CHECK-PIC: __tls_get_addr
@@ -18,7 +18,7 @@ entry:
 
 define i32* @g() {
 ; CHECK-NONPIC: g:
-; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
 ; CHECK-NONPIC: i(gottpoff)
 ; CHECK-PIC: g:
 ; CHECK-PIC: __tls_get_addr
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
new file mode 100644
index 0000000..ce3364d
--- /dev/null
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+; rdar://7759395
+
+%0 = type { i32, i32 }
+
+define void @t(%0*, i32, i32, i32, i32) nounwind {
+  tail call void @llvm.dbg.value(metadata !{%0* %0}, i64 0, metadata !0)
+  unreachable
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 0} ;
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index b4b6ed9..bfb7f6e 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -12,10 +12,10 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:	it ne
-; CHECK-NEXT: ldmfdne.w
+; CHECK-NEXT: ldmiane.w
 ; CHECK-NEXT: itt eq
 ; CHECK-NEXT: subeq.w
-; CHECK-NEXT: ldmfdeq.w
+; CHECK-NEXT: ldmiaeq.w
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index fe0e506..9729534 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -27,7 +27,7 @@ define i32 @test3() {
 ; DARWIN: sub.w sp, sp, #805306368
 ; DARWIN: sub sp, #20
 ; LINUX: test3:
-; LINUX: stmfd   sp!, {r4, r7, r11, lr}
+; LINUX: stmdb   sp!, {r4, r7, r11, lr}
 ; LINUX: sub.w sp, sp, #805306368
 ; LINUX: sub sp, #16
     %retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index f007b5c..55cdac9 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
 
 define arm_apcscc %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	r9, [r7, #+28]
+; CHECK:       ldr.w	r9, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br i1 false, label %bb, label %bb20
@@ -50,9 +50,9 @@ bb119:                                            ; preds = %bb20, %bb20
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
 ; CHECK: str r{{[0-7]}}, [sp]
-; CHECK: str r{{[0-7]}}, [sp, #+4]
-; CHECK: str r{{[0-7]}}, [sp, #+8]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #+24]
+; CHECK: str r{{[0-7]}}, [sp, #4]
+; CHECK: str r{{[0-7]}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 94888fd..88434f1 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -11,7 +11,7 @@ entry:
 define i32 @f2(i32* %v) {
 entry:
 ; CHECK: f2:
-; CHECK: ldr.w r0, [r0, #+4092]
+; CHECK: ldr.w r0, [r0, #4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
         ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index f1fb79c..fee97bf 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -11,7 +11,7 @@ entry:
 define i16 @f2(i16* %v) {
 entry:
 ; CHECK: f2:
-; CHECK: ldrh.w r0, [r0, #+2046]
+; CHECK: ldrh.w r0, [r0, #2046]
         %tmp2 = getelementptr i16* %v, i16 1023
         %tmp = load i16* %tmp2
         ret i16 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 3eeec8c..11bb936 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -9,7 +9,7 @@ define i32 @f1(i32 %a, i32* %v) {
 
 define i32 @f2(i32 %a, i32* %v) {
 ; CHECK: f2:
-; CHECK: str.w r0, [r1, #+4092]
+; CHECK: str.w r0, [r1, #4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         store i32 %a, i32* %tmp2
         ret i32 %a
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 9af960b..1e6616a 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -2,7 +2,7 @@
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
 ; CHECK: test1
-; CHECK: str  r1, [r0, #+16]!
+; CHECK: str  r1, [r0, #16]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
@@ -12,7 +12,7 @@ define void @test1(i32* %X, i32* %A, i32** %dest) {
 
 define i16* @test2(i16* %X, i32* %A) {
 ; CHECK: test2
-; CHECK: strh r1, [r0, #+8]!
+; CHECK: strh r1, [r0, #8]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index 1ebb938..7978e7f 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -9,7 +9,7 @@ define i8 @f1(i8 %a, i8* %v) {
 
 define i8 @f2(i8 %a, i8* %v) {
 ; CHECK: f2:
-; CHECK: strb.w r0, [r1, #+4092]
+; CHECK: strb.w r0, [r1, #4092]
         %tmp2 = getelementptr i8* %v, i32 4092
         store i8 %a, i8* %tmp2
         ret i8 %a
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index b0eb8c1..97110a7 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -9,7 +9,7 @@ define i16 @f1(i16 %a, i16* %v) {
 
 define i16 @f2(i16 %a, i16* %v) {
 ; CHECK: f2:
-; CHECK: strh.w r0, [r1, #+4092]
+; CHECK: strh.w r0, [r1, #4092]
         %tmp2 = getelementptr i16* %v, i32 2046
         store i16 %a, i16* %tmp2
         ret i16 %a
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 317ed0a..58e186b 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,12 +11,12 @@ define float @foo(float %x) nounwind {
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK:      mulss	LCPI1_3(%rip)
-; CHECK-NEXT: mulss	LCPI1_0(%rip)
-; CHECK-NEXT: mulss	LCPI1_1(%rip)
-; CHECK-NEXT: mulss	LCPI1_2(%rip)
-; CHECK-NEXT: addss
-; CHECK-NEXT: addss
-; CHECK-NEXT: addss
-; CHECK-NEXT: ret
+; CHECK: mulss	LCPI1_0(%rip)
+; CHECK: mulss	LCPI1_1(%rip)
+; CHECK: addss
+; CHECK: mulss	LCPI1_2(%rip)
+; CHECK: addss
+; CHECK: mulss	LCPI1_3(%rip)
+; CHECK: addss
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2010-03-17-ISelBug.ll b/test/CodeGen/X86/2010-03-17-ISelBug.ll
new file mode 100644
index 0000000..609b4e2
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-17-ISelBug.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin5
+; rdar://7761790
+
+%"struct..0$_485" = type { i16, i16, i32 }
+%union.PPToken = type { %"struct..0$_485" }
+%struct.PPOperation = type { %union.PPToken, %union.PPToken, [6 x %union.PPToken], i32, i32, i32, [1 x i32], [0 x i8] }
+
+define i32* @t() align 2 nounwind {
+entry:
+  %operation = alloca %struct.PPOperation, align 8 ; <%struct.PPOperation*> [#uses=2]
+  %0 = load i32*** null, align 4  ; [#uses=1]
+  %1 = ptrtoint i32** %0 to i32   ; <i32> [#uses=1]
+  %2 = sub nsw i32 %1, undef                      ; <i32> [#uses=2]
+  br i1 false, label %bb20, label %bb.nph380
+
+bb20:                                             ; preds = %entry
+  ret i32* null
+
+bb.nph380:                                        ; preds = %entry
+  %scevgep403 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 2 ; <i32*> [#uses=1]
+  %3 = ashr i32 %2, 1                             ; <i32> [#uses=1]
+  %tmp405 = and i32 %3, -2                        ; <i32> [#uses=1]
+  %scevgep408 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 1 ; <i16*> [#uses=1]
+  %tmp410 = and i32 %2, -4                        ; <i32> [#uses=1]
+  br label %bb169
+
+bb169:                                            ; preds = %bb169, %bb.nph380
+  %index.6379 = phi i32 [ 0, %bb.nph380 ], [ %4, %bb169 ] ; <i32> [#uses=3]
+  %tmp404 = mul i32 %index.6379, -2               ; <i32> [#uses=1]
+  %tmp406 = add i32 %tmp405, %tmp404              ; <i32> [#uses=1]
+  %scevgep407 = getelementptr i32* %scevgep403, i32 %tmp406 ; <i32*> [#uses=1]
+  %tmp409 = mul i32 %index.6379, -4               ; <i32> [#uses=1]
+  %tmp411 = add i32 %tmp410, %tmp409              ; <i32> [#uses=1]
+  %scevgep412 = getelementptr i16* %scevgep408, i32 %tmp411 ; <i16*> [#uses=1]
+  store i16 undef, i16* %scevgep412, align 2
+  store i32 undef, i32* %scevgep407, align 4
+  %4 = add nsw i32 %index.6379, 1                 ; <i32> [#uses=1]
+  br label %bb169
+}
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 0719838..3e730de 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
+; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
 ; bounce the vector off of cache rather than shuffling each individual
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 2f6fb3f..ab71555 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -8,10 +8,10 @@ target triple = "x86_64-unknown-unknown"
 
 ; CHECK: full_me_0:
 ; CHECK: movsd   (%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: mulsd   (%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -53,10 +53,10 @@ return:
 ; CHECK: mulsd   -2048(%rdx), %xmm0
 ; CHECK: movsd   %xmm0, -2048(%rdi)
 ; CHECK: movsd   (%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: divsd   (%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -99,10 +99,10 @@ return:
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
 ; CHECK: movsd   -2048(%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: divsd   -2048(%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -144,10 +144,10 @@ return:
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
 ; CHECK: movsd   -4096(%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: divsd   -4096(%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, -4096(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -310,10 +310,10 @@ return:
 ; CHECK: addsd   (%rsi), %xmm0
 ; CHECK: movsd   %xmm0, (%rdx)
 ; CHECK: movsd   40(%rdi), %xmm0
-; CHECK: addq    $8, %rdi
 ; CHECK: subsd   40(%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: movsd   %xmm0, 40(%rdx)
+; CHECK: addq    $8, %rdi
+; CHECK: addq    $8, %rsi
 ; CHECK: addq    $8, %rdx
 ; CHECK: decq    %rcx
 ; CHECK: jne
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 90315fd..ce35b45 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -57,11 +57,11 @@ define void @t5(void ()* nocapture %x) nounwind ssp {
 entry:
 ; 32: t5:
 ; 32-NOT: call
-; 32: jmpl *
+; 32: jmpl *4(%esp)
 
 ; 64: t5:
 ; 64-NOT: call
-; 64: jmpq *
+; 64: jmpq *%rdi
   tail call void %x() nounwind
   ret void
 }
@@ -215,4 +215,59 @@ entry:
   ret %struct.ns* %0
 }
 
+; rdar://6195379
+; llvm can't do sibcall for this in 32-bit mode (yet).
 declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_descriptor_withcopydispose = type { i64, i64, i8*, i8* }
+%struct.__block_literal_1 = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+%struct.__block_literal_2 = type { i8*, i32, i32, i8*, %struct.__block_descriptor_withcopydispose*, void ()* }
+
+define void @t14(%struct.__block_literal_2* nocapture %.block_descriptor) nounwind ssp {
+entry:
+; 64: t14:
+; 64: movq 32(%rdi)
+; 64-NOT: movq 16(%rdi)
+; 64: jmpq *16(%rdi)
+  %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
+  %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
+  %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
+  %3 = getelementptr inbounds %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8                      ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to void (i8*)*              ; <void (i8*)*> [#uses=1]
+  %6 = bitcast void ()* %1 to i8*                 ; <i8*> [#uses=1]
+  tail call void %5(i8* %6) nounwind
+  ret void
+}
+
+; rdar://7726868
+%struct.foo = type { [4 x i32] }
+
+define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
+; 32: t15:
+; 32: call {{_?}}f
+; 32: ret $4
+
+; 64: t15:
+; 64: callq {{_?}}f
+; 64: ret
+  tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
+  ret void
+}
+
+declare void @f(%struct.foo* noalias sret) nounwind
+
+define void @t16() nounwind ssp {
+entry:
+; 32: t16:
+; 32: call {{_?}}bar4
+; 32: fstp
+
+; 64: t16:
+; 64: jmp {{_?}}bar4
+  %0 = tail call double @bar4() nounwind
+  ret void
+}
+
+declare double @bar4()
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index f2b8010..20b8eac 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -10,10 +10,10 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
         
 ; CHECK: t1:
 ; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movl	4(%esp), %ecx
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movapd	%xmm0, (%ecx)
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
 
@@ -26,9 +26,9 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
         
 ; CHECK: t2:
 ; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movl	4(%esp), %ecx
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movapd	%xmm0, (%ecx)
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 921161e..e9c2c01 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -17,8 +17,8 @@ entry:
         
 ; X64: t0:
 ; X64: 	movddup	(%rsi), %xmm0
-; X64:	xorl	%eax, %eax
 ; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
 ; X64:	pinsrw	$0, %eax, %xmm0
 ; X64:	movaps	%xmm0, (%rdi)
 ; X64:	ret
@@ -169,11 +169,11 @@ define internal void @t10() nounwind {
         ret void
 ; X64: 	t10:
 ; X64: 		pextrw	$4, %xmm0, %eax
-; X64: 		pextrw	$6, %xmm0, %edx
 ; X64: 		movlhps	%xmm1, %xmm1
 ; X64: 		pshuflw	$8, %xmm1, %xmm1
 ; X64: 		pinsrw	$2, %eax, %xmm1
-; X64: 		pinsrw	$3, %edx, %xmm1
+; X64: 		pextrw	$6, %xmm0, %eax
+; X64: 		pinsrw	$3, %eax, %xmm1
 }
 
 
@@ -184,8 +184,8 @@ entry:
 	ret <8 x i16> %tmp7
 
 ; X64: t11:
-; X64:	movlhps	%xmm0, %xmm0
 ; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
 ; X64:	pshuflw	$1, %xmm0, %xmm0
 ; X64:	pinsrw	$1, %eax, %xmm0
 ; X64:	ret
@@ -198,8 +198,8 @@ entry:
 	ret <8 x i16> %tmp9
 
 ; X64: t12:
-; X64: 	movlhps	%xmm0, %xmm0
 ; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
 ; X64: 	pshufhw	$3, %xmm0, %xmm0
 ; X64: 	pinsrw	$5, %eax, %xmm0
 ; X64: 	ret
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
index c9c4d01..1723909 100644
--- a/test/CodeGen/X86/sse42.ll
+++ b/test/CodeGen/X86/sse42.ll
@@ -9,10 +9,10 @@ define i32 @crc32_8(i32 %a, i8 %b) nounwind {
   %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
   ret i32 %tmp
 ; X32: _crc32_8:
-; X32:     crc32   8(%esp), %eax
+; X32:     crc32b   8(%esp), %eax
 
 ; X64: _crc32_8:
-; X64:     crc32   %sil, %eax
+; X64:     crc32b   %sil, %eax
 }
 
 
@@ -20,10 +20,10 @@ define i32 @crc32_16(i32 %a, i16 %b) nounwind {
   %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
   ret i32 %tmp
 ; X32: _crc32_16:
-; X32:     crc32   8(%esp), %eax
+; X32:     crc32w   8(%esp), %eax
 
 ; X64: _crc32_16:
-; X64:     crc32   %si, %eax
+; X64:     crc32w   %si, %eax
 }
 
 
@@ -31,8 +31,8 @@ define i32 @crc32_32(i32 %a, i32 %b) nounwind {
   %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
   ret i32 %tmp
 ; X32: _crc32_32:
-; X32:     crc32   8(%esp), %eax
+; X32:     crc32l   8(%esp), %eax
 
 ; X64: _crc32_32:
-; X64:     crc32   %esi, %eax
+; X64:     crc32l   %esi, %eax
 }
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 11383fa..58b557a 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -152,4 +152,28 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 	%x = add %i8vec31 %a, %b
 	store %i8vec31 %x, %i8vec31* %ret, align 16
 	ret void
-}
-\ No newline at end of file
+}
+
+
+%i8vec3pack = type { <3 x i8>, i8 }
+define %i8vec3pack  @rot() nounwind {
+; CHECK: shrb
+entry:
+  %X = alloca %i8vec3pack, align 4
+  %rot = alloca %i8vec3pack, align 4
+  %result = alloca %i8vec3pack, align 4
+  %storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
+  store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
+  %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
+  store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
+  %tmp = load %i8vec3pack* %X
+  %extractVec = extractvalue %i8vec3pack %tmp, 0
+  %tmp2 = load %i8vec3pack* %rot
+  %extractVec3 = extractvalue %i8vec3pack %tmp2, 0
+  %shr = lshr <3 x i8> %extractVec, %extractVec3
+  %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
+  store <3 x i8> %shr, <3 x i8>* %storetmp4
+  %tmp5 = load %i8vec3pack* %result
+  ret %i8vec3pack %tmp5
+}
+
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
new file mode 100644
index 0000000..1f7a889
--- /dev/null
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | opt -verify -disable-output
+
+define void @Foo(i32 %a, i32 %b) {
+entry:
+  call void @llvm.dbg.declare(metadata !{i32* null}, metadata !1)
+  ret void
+}
+
+!0 = metadata !{i32 662302, i32 26, metadata !1, null}
+!1 = metadata !{i32 4, metadata !"foo"}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
diff --git a/test/FrontendObjC/2010-03-17-StructRef.m b/test/FrontendObjC/2010-03-17-StructRef.m
new file mode 100644
index 0000000..a8a509c
--- /dev/null
+++ b/test/FrontendObjC/2010-03-17-StructRef.m
@@ -0,0 +1,43 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// Bitfield references must not touch memory outside of the enclosing
+// struct.   Radar 7639995
+typedef signed char BOOL;
+@protocol NSObject
+- (id)init;
+@end
+@interface NSObject <NSObject> {}
+@end
+@interface IMAVChatParticipant : NSObject {
+  int _ardRole;
+  int _state;
+  int _avRelayStatus;
+  int _chatEndedReason;
+  int _chatError;
+  unsigned _sendingAudio:1;
+  unsigned _sendingVideo:1;
+  unsigned _sendingAuxVideo:1;
+  unsigned _audioMuted:1;
+  unsigned _videoPaused:1;
+  unsigned _networkStalled:1;
+  unsigned _isInitiator:1;
+  unsigned _isAOLInterop:1;
+  unsigned _isRecording:1;
+  unsigned _isUsingICE:1;
+}
+@end
+@implementation IMAVChatParticipant
+- (id) init {
+  self = [super init];
+  if ( self ) {
+    BOOL blah = (BOOL)1;
+    // We're expecting these three bitfield assignments will generate i8 stores.
+    _sendingAudio = (BOOL)1;
+    _isUsingICE = (BOOL)1;
+    _isUsingICE = blah;
+    // CHECK: store i8
+    // CHECK: store i8
+    // CHECK: store i8
+  }
+  return self;
+}
+@end
diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td
index 539a93f..254d5ea 100644
--- a/test/LLVMC/AppendCmdHook.td
+++ b/test/LLVMC/AppendCmdHook.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/C++/dash-x.cpp b/test/LLVMC/C++/dash-x.cpp
index b32400e..7d4cf19 100644
--- a/test/LLVMC/C++/dash-x.cpp
+++ b/test/LLVMC/C++/dash-x.cpp
@@ -1,6 +1,7 @@
 // Test that we can compile .c files as C++ and vice versa
 // RUN: llvmc %s -x c++ %p/../test_data/false.c -x c %p/../test_data/false.cpp -x lisp -x whatnot -x none %p/../test_data/false2.cpp -o %t
 // RUN: %abs_tmp | grep hello
+// XFAIL: vg
 
 extern int test_main();
 
diff --git a/test/LLVMC/C++/hello.cpp b/test/LLVMC/C++/hello.cpp
index b9c6399..8f38306 100644
--- a/test/LLVMC/C++/hello.cpp
+++ b/test/LLVMC/C++/hello.cpp
@@ -1,6 +1,7 @@
 // Test that we can compile C++ code.
 // RUN: llvmc %s -o %t
 // RUN: %abs_tmp | grep hello
+// XFAIL: vg
 #include <iostream>
 
 int main() {
diff --git a/test/LLVMC/C++/together.cpp b/test/LLVMC/C++/together.cpp
index e02f69a..925215a4 100644
--- a/test/LLVMC/C++/together.cpp
+++ b/test/LLVMC/C++/together.cpp
@@ -1,6 +1,7 @@
 // Check that we can compile files of different types together.
 // RUN: llvmc %s %p/../test_data/together.c -o %t
 // RUN: %abs_tmp | grep hello
+// XFAIL: vg
 
 extern "C" void test();
 
diff --git a/test/LLVMC/C/emit-llvm.c b/test/LLVMC/C/emit-llvm.c
index 38bbba6..9844bc7 100644
--- a/test/LLVMC/C/emit-llvm.c
+++ b/test/LLVMC/C/emit-llvm.c
@@ -1,4 +1,5 @@
 // RUN: llvmc -c -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+// XFAIL: vg_leak
 
 int f0(void) {
 }
diff --git a/test/LLVMC/C/hello.c b/test/LLVMC/C/hello.c
index b2d903f..29ad39f 100644
--- a/test/LLVMC/C/hello.c
+++ b/test/LLVMC/C/hello.c
@@ -2,6 +2,7 @@
  * Check that we can compile helloworld
  * RUN: llvmc %s -o %t
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/C/include.c b/test/LLVMC/C/include.c
index 07ae761..9c9530b 100644
--- a/test/LLVMC/C/include.c
+++ b/test/LLVMC/C/include.c
@@ -2,6 +2,7 @@
  * Check that the 'include' options work.
  * RUN: echo "int x;\n" > %t1.inc
  * RUN: llvmc -include %t1.inc -fsyntax-only %s
+ * XFAIL: vg_leak
  */
 
 int f0(void) {
diff --git a/test/LLVMC/C/opt-test.c b/test/LLVMC/C/opt-test.c
index d69dc9b..7924def 100644
--- a/test/LLVMC/C/opt-test.c
+++ b/test/LLVMC/C/opt-test.c
@@ -2,6 +2,7 @@
  * Check that the -opt switch works.
  * RUN: llvmc %s -opt -o %t
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/C/sink.c b/test/LLVMC/C/sink.c
index bdff340..c4f9beb 100644
--- a/test/LLVMC/C/sink.c
+++ b/test/LLVMC/C/sink.c
@@ -2,6 +2,7 @@
  * Check that the 'sink' options work.
  * RUN: llvmc -v -Wall %s -o %t |& grep "Wall"
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/C/wall.c b/test/LLVMC/C/wall.c
index f676099..36813ba 100644
--- a/test/LLVMC/C/wall.c
+++ b/test/LLVMC/C/wall.c
@@ -2,6 +2,7 @@
  * Check that -Wall works as intended
  * RUN: llvmc -Wall %s -o %t
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td
index 934905b..e5d5e9a 100644
--- a/test/LLVMC/EmptyCompilationGraph.td
+++ b/test/LLVMC/EmptyCompilationGraph.td
@@ -1,6 +1,7 @@
 // Check that the compilation graph can be empty.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td
index c563171..86091db 100644
--- a/test/LLVMC/EnvParentheses.td
+++ b/test/LLVMC/EnvParentheses.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: not grep {FOO")));} %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td
index 77cb4bf..d84ea84 100644
--- a/test/LLVMC/ExternOptions.td
+++ b/test/LLVMC/ExternOptions.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td
index 7c3bd17..536b96a 100644
--- a/test/LLVMC/ForwardAs.td
+++ b/test/LLVMC/ForwardAs.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td
index 2caef6c..5e0bf29 100644
--- a/test/LLVMC/ForwardTransformedValue.td
+++ b/test/LLVMC/ForwardTransformedValue.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td
index 463235c..4c7a0ee 100644
--- a/test/LLVMC/ForwardValue.td
+++ b/test/LLVMC/ForwardValue.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td
index 312fa9c..5ff96cd 100644
--- a/test/LLVMC/HookWithArguments.td
+++ b/test/LLVMC/HookWithArguments.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td
index f58e3f4..9855dbc 100644
--- a/test/LLVMC/HookWithInFile.td
+++ b/test/LLVMC/HookWithInFile.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td
index ff9a0d8..05209bf 100644
--- a/test/LLVMC/Init.td
+++ b/test/LLVMC/Init.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td
index b52af57..73ccb63 100644
--- a/test/LLVMC/MultiValuedOption.td
+++ b/test/LLVMC/MultiValuedOption.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td
index 9702248..86cd613 100644
--- a/test/LLVMC/MultipleCompilationGraphs.td
+++ b/test/LLVMC/MultipleCompilationGraphs.td
@@ -1,6 +1,7 @@
 // Check that multiple compilation graphs are allowed.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td
index 015bfdd..a80bcfe 100644
--- a/test/LLVMC/NoActions.td
+++ b/test/LLVMC/NoActions.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td
index 96c1f17..69df701 100644
--- a/test/LLVMC/NoCompilationGraph.td
+++ b/test/LLVMC/NoCompilationGraph.td
@@ -1,5 +1,6 @@
 // Check that the compilation graph is not required.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td
index 42ec693..37fbc87 100644
--- a/test/LLVMC/OneOrMore.td
+++ b/test/LLVMC/OneOrMore.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td
index 8a31481..c2641be 100644
--- a/test/LLVMC/OptionPreprocessor.td
+++ b/test/LLVMC/OptionPreprocessor.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td
index 9523e24..0388cb0 100644
--- a/test/LLVMC/TestWarnings.td
+++ b/test/LLVMC/TestWarnings.td
@@ -1,6 +1,7 @@
 // Check that warnings about unused options are really emitted.
 // This should fail because the output is printed on stderr.
-// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
+// RUN: tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/MC/AsmParser/X86/x86_32-bit_cat.s b/test/MC/AsmParser/X86/x86_32-bit_cat.s
index 5429e8e..e910c65 100644
--- a/test/MC/AsmParser/X86/x86_32-bit_cat.s
+++ b/test/MC/AsmParser/X86/x86_32-bit_cat.s
@@ -7756,41 +7756,38 @@
 // CHECK: 	ptest 	%xmm5, %xmm5
         	ptest	%xmm5,%xmm5
 
-// CHECK: 	crc32 	3735928559(%ebx,%ecx,8), %ecx
-        	crc32	0xdeadbeef(%ebx,%ecx,8),%ecx
+// CHECK: 	crc32b 	%bl, %eax
+                crc32b %bl, %eax
 
-// CHECK: 	crc32 	69, %ecx
-        	crc32	0x45,%ecx
+// CHECK: 	crc32b 	4(%ebx), %eax
+                crc32b 4(%ebx), %eax
 
-// CHECK: 	crc32 	32493, %ecx
-        	crc32	0x7eed,%ecx
+// CHECK: 	crc32w 	%bx, %eax
+                crc32w %bx, %eax
 
-// CHECK: 	crc32 	3133065982, %ecx
-        	crc32	0xbabecafe,%ecx
+// CHECK: 	crc32w 	4(%ebx), %eax
+                crc32w 4(%ebx), %eax
 
-// CHECK: 	crc32 	305419896, %ecx
-        	crc32	0x12345678,%ecx
+// CHECK: 	crc32l 	%ebx, %eax
+                crc32l %ebx, %eax
 
-// CHECK: 	crc32 	%ecx, %ecx
-        	crc32	%ecx,%ecx
+// CHECK: 	crc32l 	4(%ebx), %eax
+                crc32l 4(%ebx), %eax
 
-// CHECK: 	crc32 	%ecx, %ecx
-        	crc32	%ecx,%ecx
+// CHECK: 	crc32l 	3735928559(%ebx,%ecx,8), %ecx
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
 
-// CHECK: 	crc32 	3735928559(%ebx,%ecx,8), %ecx
-        	crc32	0xdeadbeef(%ebx,%ecx,8),%ecx
+// CHECK: 	crc32l 	69, %ecx
+                crc32l 0x45,%ecx
 
-// CHECK: 	crc32 	69, %ecx
-        	crc32	0x45,%ecx
+// CHECK: 	crc32l 	32493, %ecx
+                crc32l 0x7eed,%ecx
 
-// CHECK: 	crc32 	32493, %ecx
-        	crc32	0x7eed,%ecx
+// CHECK: 	crc32l 	3133065982, %ecx
+                crc32l 0xbabecafe,%ecx
 
-// CHECK: 	crc32 	3133065982, %ecx
-        	crc32	0xbabecafe,%ecx
-
-// CHECK: 	crc32 	305419896, %ecx
-        	crc32	0x12345678,%ecx
+// CHECK: 	crc32l 	%ecx, %ecx
+                crc32l %ecx,%ecx
 
 // CHECK: 	pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
         	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index 7dacc75..2088aa7b 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -9861,3 +9861,47 @@
 // CHECK: pcmpgtq	%xmm5, %xmm5
 // CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xed]
         	pcmpgtq	%xmm5,%xmm5
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+                crc32b %bl, %eax
+
+// CHECK: crc32b 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+                crc32b 4(%ebx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32w %bx, %eax
+
+// CHECK: crc32w 	4(%ebx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32w 4(%ebx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32l %ebx, %eax
+
+// CHECK: crc32l 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32l 4(%ebx), %eax
+
+// CHECK: crc32l 	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0x45,0x00,0x00,0x00]
+                crc32l 0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xed,0x7e,0x00,0x00]
+                crc32l 0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xfe,0xca,0xbe,0xba]
+                crc32l 0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+                crc32l %ecx,%ecx
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
new file mode 100644
index 0000000..3920c5b
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+        crc32b	%bl, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32w	%bx, %eax
+
+// CHECK: crc32w 	4(%rbx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32w	4(%rbx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32l	%ebx, %eax
+
+// CHECK: crc32l 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32l	4(%rbx), %eax
+
+// CHECK: crc32l 	3735928559(%rbx,%rcx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	crc32l   0xdeadbeef(%rbx,%rcx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0x45,0x00,0x00,0x00]
+        	crc32l   0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xed,0x7e,0x00,0x00]
+        	crc32l   0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xfe,0xca,0xbe,0xba]
+        	crc32l   0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+        	crc32l   %ecx,%ecx
+
+// CHECK: crc32b 	%r11b, %eax
+// CHECK:  encoding: [0xf2,0x41,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32b 	%dil, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0xc7]
+        crc32b	%dil,%rax
+
+// CHECK: crc32b 	%r11b, %rax
+// CHECK:  encoding: [0xf2,0x49,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b,%rax
+
+// CHECK: crc32b 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %rax
+
+// CHECK: crc32q 	%rbx, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc3]
+        crc32q	%rbx, %rax
+
+// CHECK: crc32q 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
+        crc32q	4(%rbx), %rax
diff --git a/test/MC/AsmParser/X86/x86_64-incl_decl.s b/test/MC/AsmParser/X86/x86_64-incl_decl.s
new file mode 100644
index 0000000..51315f8
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-incl_decl.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck --check-prefix=CHECK-X86_32 %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck --check-prefix=CHECK-X86_64 %s
+
+# CHECK-X86_32:	incb	%al # encoding: [0xfe,0xc0]
+# CHECK-X86_64:	incb	%al # encoding: [0xfe,0xc0]
+	incb %al
+
+# CHECK-X86_32:	incw	%ax # encoding: [0x66,0x40]
+# CHECK-X86_64:	incw	%ax # encoding: [0x66,0xff,0xc0]
+	incw %ax
+
+# CHECK-X86_32:	incl	%eax # encoding: [0x40]
+# CHECK-X86_64:	incl	%eax # encoding: [0xff,0xc0]
+	incl %eax
+
+# CHECK-X86_32:	decb	%al # encoding: [0xfe,0xc8]
+# CHECK-X86_64:	decb	%al # encoding: [0xfe,0xc8]
+	decb %al
+
+# CHECK-X86_32:	decw	%ax # encoding: [0x66,0x48]
+# CHECK-X86_64:	decw	%ax # encoding: [0x66,0xff,0xc8]
+	decw %ax
+
+# CHECK-X86_32:	decl	%eax # encoding: [0x48]
+# CHECK-X86_64:	decl	%eax # encoding: [0xff,0xc8]
+	decl %eax
diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s
index 797558a..4028bee 100644
--- a/test/MC/AsmParser/X86/x86_64-new-encoder.s
+++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s
@@ -25,5 +25,30 @@ movq	$12, foo(%rip)
 // CHECK: encoding: [0x48,0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
 // CHECK:    fixup A - offset: 3, value: foo-8, kind: reloc_riprel_4byte
 
-// CHECK: addq	$-424, %rax             # encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
+// CHECK: addq	$-424, %rax
+// CHECK: encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
 addq $-424, %rax
+
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %rax
+// CHECK:  encoding: [0x48,0x8b,0x05,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %rax
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %r14
+// CHECK:  encoding: [0x4c,0x8b,0x35,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %r14
+
+
+// CHECK: movq	(%r13,%rax,8), %r13
+// CHECK:  encoding: [0x4d,0x8b,0x6c,0xc5,0x00]
+movq 0x00(%r13,%rax,8),%r13
+
+// CHECK: testq	%rax, %rbx
+// CHECK:  encoding: [0x48,0x85,0xd8]
+testq %rax, %rbx
+
+// CHECK: cmpq	%rbx, %r14
+// CHECK:   encoding: [0x49,0x39,0xde]
+        cmpq %rbx, %r14
diff --git a/test/MC/MachO/Darwin/dg.exp b/test/MC/MachO/Darwin/dg.exp
deleted file mode 100644
index 0f34b63..0000000
--- a/test/MC/MachO/Darwin/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_darwin_and_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
diff --git a/test/MC/MachO/Darwin/x86_32_diff_as.s b/test/MC/MachO/Darwin/x86_32_diff_as.s
deleted file mode 100644
index 7fe75aa..0000000
--- a/test/MC/MachO/Darwin/x86_32_diff_as.s
+++ /dev/null
@@ -1,551 +0,0 @@
-// Validate that we can assemble this file exactly like the platform
-// assembler.
-//
-// XFAIL: *
-// RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown -o %t.mc.o %s
-// RUN: as -arch i386 -o %t.as.o %s
-// RUN: diff %t.mc.o %t.as.o
-
-        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	pushl	0xdeadbeef(%ebx,%ecx,8)
-        	popl	0xdeadbeef(%ebx,%ecx,8)
-        	lahf
-        	sahf
-        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	incl	0xdeadbeef(%ebx,%ecx,8)
-        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	decl	0xdeadbeef(%ebx,%ecx,8)
-        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	negl	0xdeadbeef(%ebx,%ecx,8)
-        	notl	0xdeadbeef(%ebx,%ecx,8)
-        	cbtw
-        	cwtl
-        	cwtd
-        	cltd
-        	mull	0xdeadbeef(%ebx,%ecx,8)
-        	imull	0xdeadbeef(%ebx,%ecx,8)
-        	divl	0xdeadbeef(%ebx,%ecx,8)
-        	idivl	0xdeadbeef(%ebx,%ecx,8)
-        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
-        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	roll	0xdeadbeef(%ebx,%ecx,8)
-        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
-        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	rorl	0xdeadbeef(%ebx,%ecx,8)
-        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
-        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	shll	0xdeadbeef(%ebx,%ecx,8)
-        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
-        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	shrl	0xdeadbeef(%ebx,%ecx,8)
-        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
-        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	sarl	0xdeadbeef(%ebx,%ecx,8)
-        	call	*%ecx
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
-        	lret
-        	leave
-        	seto	%bl
-        	seto	0xdeadbeef(%ebx,%ecx,8)
-        	setno	%bl
-        	setno	0xdeadbeef(%ebx,%ecx,8)
-        	setb	%bl
-        	setb	0xdeadbeef(%ebx,%ecx,8)
-        	setae	%bl
-        	setae	0xdeadbeef(%ebx,%ecx,8)
-        	sete	%bl
-        	sete	0xdeadbeef(%ebx,%ecx,8)
-        	setne	%bl
-        	setne	0xdeadbeef(%ebx,%ecx,8)
-        	setbe	%bl
-        	setbe	0xdeadbeef(%ebx,%ecx,8)
-        	seta	%bl
-        	seta	0xdeadbeef(%ebx,%ecx,8)
-        	sets	%bl
-        	sets	0xdeadbeef(%ebx,%ecx,8)
-        	setns	%bl
-        	setns	0xdeadbeef(%ebx,%ecx,8)
-        	setp	%bl
-        	setp	0xdeadbeef(%ebx,%ecx,8)
-        	setnp	%bl
-        	setnp	0xdeadbeef(%ebx,%ecx,8)
-        	setl	%bl
-        	setl	0xdeadbeef(%ebx,%ecx,8)
-        	setge	%bl
-        	setge	0xdeadbeef(%ebx,%ecx,8)
-        	setle	%bl
-        	setle	0xdeadbeef(%ebx,%ecx,8)
-        	setg	%bl
-        	setg	0xdeadbeef(%ebx,%ecx,8)
-        	nopl	0xdeadbeef(%ebx,%ecx,8)
-        	nop
-        	fldl	0xdeadbeef(%ebx,%ecx,8)
-        	fildl	0xdeadbeef(%ebx,%ecx,8)
-        	fildll	0xdeadbeef(%ebx,%ecx,8)
-        	fldt	0xdeadbeef(%ebx,%ecx,8)
-        	fbld	0xdeadbeef(%ebx,%ecx,8)
-        	fstl	0xdeadbeef(%ebx,%ecx,8)
-        	fistl	0xdeadbeef(%ebx,%ecx,8)
-        	fstpl	0xdeadbeef(%ebx,%ecx,8)
-        	fistpl	0xdeadbeef(%ebx,%ecx,8)
-        	fistpll	0xdeadbeef(%ebx,%ecx,8)
-        	fstpt	0xdeadbeef(%ebx,%ecx,8)
-        	fbstp	0xdeadbeef(%ebx,%ecx,8)
-        	ficoml	0xdeadbeef(%ebx,%ecx,8)
-        	ficompl	0xdeadbeef(%ebx,%ecx,8)
-        	fucompp
-        	ftst
-        	fld1
-        	fldz
-        	faddl	0xdeadbeef(%ebx,%ecx,8)
-        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
-        	fsubl	0xdeadbeef(%ebx,%ecx,8)
-        	fisubl	0xdeadbeef(%ebx,%ecx,8)
-        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
-        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
-        	fmull	0xdeadbeef(%ebx,%ecx,8)
-        	fimull	0xdeadbeef(%ebx,%ecx,8)
-        	fdivl	0xdeadbeef(%ebx,%ecx,8)
-        	fidivl	0xdeadbeef(%ebx,%ecx,8)
-        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
-        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
-        	fsqrt
-        	fsin
-        	fcos
-        	fchs
-        	fabs
-        	fldcw	0xdeadbeef(%ebx,%ecx,8)
-        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
-        	rdtsc
-        	sysenter
-        	sysexit
-        	ud2
-        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
-        	clflush	0xdeadbeef(%ebx,%ecx,8)
-        	emms
-        	movd	%ecx,%mm3
-        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	movd	%ecx,%xmm5
-        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movd	%xmm5,%ecx
-        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	movq	%mm3,%mm3
-        	movq	%mm3,%mm3
-        	movq	%xmm5,%xmm5
-        	movq	%xmm5,%xmm5
-        	packssdw	%mm3,%mm3
-        	packssdw	%xmm5,%xmm5
-        	packsswb	%mm3,%mm3
-        	packsswb	%xmm5,%xmm5
-        	packuswb	%mm3,%mm3
-        	packuswb	%xmm5,%xmm5
-        	paddb	%mm3,%mm3
-        	paddb	%xmm5,%xmm5
-        	paddw	%mm3,%mm3
-        	paddw	%xmm5,%xmm5
-        	paddd	%mm3,%mm3
-        	paddd	%xmm5,%xmm5
-        	paddq	%mm3,%mm3
-        	paddq	%xmm5,%xmm5
-        	paddsb	%mm3,%mm3
-        	paddsb	%xmm5,%xmm5
-        	paddsw	%mm3,%mm3
-        	paddsw	%xmm5,%xmm5
-        	paddusb	%mm3,%mm3
-        	paddusb	%xmm5,%xmm5
-        	paddusw	%mm3,%mm3
-        	paddusw	%xmm5,%xmm5
-        	pand	%mm3,%mm3
-        	pand	%xmm5,%xmm5
-        	pandn	%mm3,%mm3
-        	pandn	%xmm5,%xmm5
-        	pcmpeqb	%mm3,%mm3
-        	pcmpeqb	%xmm5,%xmm5
-        	pcmpeqw	%mm3,%mm3
-        	pcmpeqw	%xmm5,%xmm5
-        	pcmpeqd	%mm3,%mm3
-        	pcmpeqd	%xmm5,%xmm5
-        	pcmpgtb	%mm3,%mm3
-        	pcmpgtb	%xmm5,%xmm5
-        	pcmpgtw	%mm3,%mm3
-        	pcmpgtw	%xmm5,%xmm5
-        	pcmpgtd	%mm3,%mm3
-        	pcmpgtd	%xmm5,%xmm5
-        	pmaddwd	%mm3,%mm3
-        	pmaddwd	%xmm5,%xmm5
-        	pmulhw	%mm3,%mm3
-        	pmulhw	%xmm5,%xmm5
-        	pmullw	%mm3,%mm3
-        	pmullw	%xmm5,%xmm5
-        	por	%mm3,%mm3
-        	por	%xmm5,%xmm5
-        	psllw	%mm3,%mm3
-        	psllw	%xmm5,%xmm5
-        	psllw	$0x7f,%mm3
-        	psllw	$0x7f,%xmm5
-        	pslld	%mm3,%mm3
-        	pslld	%xmm5,%xmm5
-        	pslld	$0x7f,%mm3
-        	pslld	$0x7f,%xmm5
-        	psllq	%mm3,%mm3
-        	psllq	%xmm5,%xmm5
-        	psllq	$0x7f,%mm3
-        	psllq	$0x7f,%xmm5
-        	psraw	%mm3,%mm3
-        	psraw	%xmm5,%xmm5
-        	psraw	$0x7f,%mm3
-        	psraw	$0x7f,%xmm5
-        	psrad	%mm3,%mm3
-        	psrad	%xmm5,%xmm5
-        	psrad	$0x7f,%mm3
-        	psrad	$0x7f,%xmm5
-        	psrlw	%mm3,%mm3
-        	psrlw	%xmm5,%xmm5
-        	psrlw	$0x7f,%mm3
-        	psrlw	$0x7f,%xmm5
-        	psrld	%mm3,%mm3
-        	psrld	%xmm5,%xmm5
-        	psrld	$0x7f,%mm3
-        	psrld	$0x7f,%xmm5
-        	psrlq	%mm3,%mm3
-        	psrlq	%xmm5,%xmm5
-        	psrlq	$0x7f,%mm3
-        	psrlq	$0x7f,%xmm5
-        	psubb	%mm3,%mm3
-        	psubb	%xmm5,%xmm5
-        	psubw	%mm3,%mm3
-        	psubw	%xmm5,%xmm5
-        	psubd	%mm3,%mm3
-        	psubd	%xmm5,%xmm5
-        	psubq	%mm3,%mm3
-        	psubq	%xmm5,%xmm5
-        	psubsb	%mm3,%mm3
-        	psubsb	%xmm5,%xmm5
-        	psubsw	%mm3,%mm3
-        	psubsw	%xmm5,%xmm5
-        	psubusb	%mm3,%mm3
-        	psubusb	%xmm5,%xmm5
-        	psubusw	%mm3,%mm3
-        	psubusw	%xmm5,%xmm5
-        	punpckhbw	%mm3,%mm3
-        	punpckhbw	%xmm5,%xmm5
-        	punpckhwd	%mm3,%mm3
-        	punpckhwd	%xmm5,%xmm5
-        	punpckhdq	%mm3,%mm3
-        	punpckhdq	%xmm5,%xmm5
-        	punpcklbw	%mm3,%mm3
-        	punpcklbw	%xmm5,%xmm5
-        	punpcklwd	%mm3,%mm3
-        	punpcklwd	%xmm5,%xmm5
-        	punpckldq	%mm3,%mm3
-        	punpckldq	%xmm5,%xmm5
-        	pxor	%mm3,%mm3
-        	pxor	%xmm5,%xmm5
-        	addps	%xmm5,%xmm5
-        	addss	%xmm5,%xmm5
-        	andnps	%xmm5,%xmm5
-        	andps	%xmm5,%xmm5
-        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtpi2ps	%mm3,%xmm5
-        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvtps2pi	%xmm5,%mm3
-        	cvtsi2ss	%ecx,%xmm5
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvttps2pi	%xmm5,%mm3
-        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	cvttss2si	%xmm5,%ecx
-        	divps	%xmm5,%xmm5
-        	divss	%xmm5,%xmm5
-        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
-        	maskmovq	%mm3,%mm3
-        	maxps	%xmm5,%xmm5
-        	maxss	%xmm5,%xmm5
-        	minps	%xmm5,%xmm5
-        	minss	%xmm5,%xmm5
-        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movaps	%xmm5,%xmm5
-        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movaps	%xmm5,%xmm5
-        	movhlps	%xmm5,%xmm5
-        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movlhps	%xmm5,%xmm5
-        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movmskps	%xmm5,%ecx
-        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
-        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movss	%xmm5,%xmm5
-        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movss	%xmm5,%xmm5
-        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movups	%xmm5,%xmm5
-        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movups	%xmm5,%xmm5
-        	mulps	%xmm5,%xmm5
-        	mulss	%xmm5,%xmm5
-        	orps	%xmm5,%xmm5
-        	pavgb	%mm3,%mm3
-        	pavgb	%xmm5,%xmm5
-        	pavgw	%mm3,%mm3
-        	pavgw	%xmm5,%xmm5
-        	pmaxsw	%mm3,%mm3
-        	pmaxsw	%xmm5,%xmm5
-        	pmaxub	%mm3,%mm3
-        	pmaxub	%xmm5,%xmm5
-        	pminsw	%mm3,%mm3
-        	pminsw	%xmm5,%xmm5
-        	pminub	%mm3,%mm3
-        	pminub	%xmm5,%xmm5
-        	pmovmskb	%mm3,%ecx
-        	pmovmskb	%xmm5,%ecx
-        	pmulhuw	%mm3,%mm3
-        	pmulhuw	%xmm5,%xmm5
-        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
-        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
-        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
-        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
-        	psadbw	%mm3,%mm3
-        	psadbw	%xmm5,%xmm5
-        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rcpps	%xmm5,%xmm5
-        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rcpss	%xmm5,%xmm5
-        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rsqrtps	%xmm5,%xmm5
-        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rsqrtss	%xmm5,%xmm5
-        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtps	%xmm5,%xmm5
-        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtss	%xmm5,%xmm5
-        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
-        	subps	%xmm5,%xmm5
-        	subss	%xmm5,%xmm5
-        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	ucomiss	%xmm5,%xmm5
-        	unpckhps	%xmm5,%xmm5
-        	unpcklps	%xmm5,%xmm5
-        	xorps	%xmm5,%xmm5
-        	addpd	%xmm5,%xmm5
-        	addsd	%xmm5,%xmm5
-        	andnpd	%xmm5,%xmm5
-        	andpd	%xmm5,%xmm5
-        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	comisd	%xmm5,%xmm5
-        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtpi2pd	%mm3,%xmm5
-        	cvtsi2sd	%ecx,%xmm5
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	divpd	%xmm5,%xmm5
-        	divsd	%xmm5,%xmm5
-        	maxpd	%xmm5,%xmm5
-        	maxsd	%xmm5,%xmm5
-        	minpd	%xmm5,%xmm5
-        	minsd	%xmm5,%xmm5
-        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movapd	%xmm5,%xmm5
-        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movapd	%xmm5,%xmm5
-        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movmskpd	%xmm5,%ecx
-        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movsd	%xmm5,%xmm5
-        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movsd	%xmm5,%xmm5
-        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movupd	%xmm5,%xmm5
-        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movupd	%xmm5,%xmm5
-        	mulpd	%xmm5,%xmm5
-        	mulsd	%xmm5,%xmm5
-        	orpd	%xmm5,%xmm5
-        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtpd	%xmm5,%xmm5
-        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtsd	%xmm5,%xmm5
-        	subpd	%xmm5,%xmm5
-        	subsd	%xmm5,%xmm5
-        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	ucomisd	%xmm5,%xmm5
-        	unpckhpd	%xmm5,%xmm5
-        	unpcklpd	%xmm5,%xmm5
-        	xorpd	%xmm5,%xmm5
-        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtdq2pd	%xmm5,%xmm5
-        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtpd2dq	%xmm5,%xmm5
-        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtdq2ps	%xmm5,%xmm5
-        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvtpd2pi	%xmm5,%mm3
-        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtps2dq	%xmm5,%xmm5
-        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtsd2ss	%xmm5,%xmm5
-        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtss2sd	%xmm5,%xmm5
-        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvttpd2pi	%xmm5,%mm3
-        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	cvttsd2si	%xmm5,%ecx
-        	maskmovdqu	%xmm5,%xmm5
-        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movdqa	%xmm5,%xmm5
-        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movdqa	%xmm5,%xmm5
-        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movdq2q	%xmm5,%mm3
-        	movq2dq	%mm3,%xmm5
-        	pmuludq	%mm3,%mm3
-        	pmuludq	%xmm5,%xmm5
-        	pslldq	$0x7f,%xmm5
-        	psrldq	$0x7f,%xmm5
-        	punpckhqdq	%xmm5,%xmm5
-        	punpcklqdq	%xmm5,%xmm5
-        	addsubpd	%xmm5,%xmm5
-        	addsubps	%xmm5,%xmm5
-        	haddpd	%xmm5,%xmm5
-        	haddps	%xmm5,%xmm5
-        	hsubpd	%xmm5,%xmm5
-        	hsubps	%xmm5,%xmm5
-        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movddup	%xmm5,%xmm5
-        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movshdup	%xmm5,%xmm5
-        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movsldup	%xmm5,%xmm5
-        	phaddw	%mm3,%mm3
-        	phaddw	%xmm5,%xmm5
-        	phaddd	%mm3,%mm3
-        	phaddd	%xmm5,%xmm5
-        	phaddsw	%mm3,%mm3
-        	phaddsw	%xmm5,%xmm5
-        	phsubw	%mm3,%mm3
-        	phsubw	%xmm5,%xmm5
-        	phsubd	%mm3,%mm3
-        	phsubd	%xmm5,%xmm5
-        	phsubsw	%mm3,%mm3
-        	phsubsw	%xmm5,%xmm5
-        	pmaddubsw	%mm3,%mm3
-        	pmaddubsw	%xmm5,%xmm5
-        	pmulhrsw	%mm3,%mm3
-        	pmulhrsw	%xmm5,%xmm5
-        	pshufb	%mm3,%mm3
-        	pshufb	%xmm5,%xmm5
-        	psignb	%mm3,%mm3
-        	psignb	%xmm5,%xmm5
-        	psignw	%mm3,%mm3
-        	psignw	%xmm5,%xmm5
-        	psignd	%mm3,%mm3
-        	psignd	%xmm5,%xmm5
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	pabsb	%mm3,%mm3
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pabsb	%xmm5,%xmm5
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	pabsw	%mm3,%mm3
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pabsw	%xmm5,%xmm5
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	pabsd	%mm3,%mm3
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pabsd	%xmm5,%xmm5
-        	femms
-        	packusdw	%xmm5,%xmm5
-        	pcmpeqq	%xmm5,%xmm5
-        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	phminposuw	%xmm5,%xmm5
-        	pmaxsb	%xmm5,%xmm5
-        	pmaxsd	%xmm5,%xmm5
-        	pmaxud	%xmm5,%xmm5
-        	pmaxuw	%xmm5,%xmm5
-        	pminsb	%xmm5,%xmm5
-        	pminsd	%xmm5,%xmm5
-        	pminud	%xmm5,%xmm5
-        	pminuw	%xmm5,%xmm5
-        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxbw	%xmm5,%xmm5
-        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxbd	%xmm5,%xmm5
-        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxbq	%xmm5,%xmm5
-        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxwd	%xmm5,%xmm5
-        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxwq	%xmm5,%xmm5
-        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxdq	%xmm5,%xmm5
-        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxbw	%xmm5,%xmm5
-        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxbd	%xmm5,%xmm5
-        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxbq	%xmm5,%xmm5
-        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxwd	%xmm5,%xmm5
-        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxwq	%xmm5,%xmm5
-        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxdq	%xmm5,%xmm5
-        	pmuldq	%xmm5,%xmm5
-        	pmulld	%xmm5,%xmm5
-        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	ptest	%xmm5,%xmm5
-        	pcmpgtq	%xmm5,%xmm5
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
new file mode 100644
index 0000000..38fa074
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -0,0 +1,329 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .text
+
+// FIXME: llvm-mc doesn't handle this in a way we can make compatible with 'as',
+// currently, because of how we handle assembler variables.
+//
+// See <rdar://problem/7763719> improve handling of absolute symbols
+
+// _baz = 4
+
+_foo:
+        xorl %eax,%eax
+_g0:
+        xorl %eax,%eax
+L0:
+        jmp 4
+//        jmp _baz
+
+// FIXME: Darwin 'as' for historical reasons widens this jump, but doesn't emit
+// a relocation. It seems like 'as' widens any jump that is not to a temporary,
+// which is inherited from the x86_32 behavior, even though x86_64 could do
+// better.
+//        jmp _g0
+
+        jmp L0
+        jmp _g1
+
+// FIXME: Darwin 'as' gets this wrong as well, even though it could get it right
+// given the other things we do on x86_64. It is using a short jump here. This
+// is probably fallout of the hack that exists for x86_32.
+//        jmp L1
+
+// FIXME: We don't support this, and would currently get it wrong, it should be a jump to an absolute address.
+//        jmp L0 - _g0
+
+//        jmp _g1 - _g0
+// FIXME: Darwin 'as' comes up with 'SIGNED' here instead of 'BRANCH'.
+//        jmp _g1 - L1
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L1 - _g0
+
+        jmp _g2
+        jmp L2
+        jmp _g3
+        jmp L3
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L2 - _g3
+//        jmp _g3 - _g2
+// FIXME: Darwin 'as' comes up with 'SIGNED' here instead of 'BRANCH'.
+//        jmp _g3 - L3
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L3 - _g2
+
+        movl %eax,4(%rip)
+//        movl %eax,_baz(%rip)
+        movl %eax,_g0(%rip)
+        movl %eax,L0(%rip)
+        movl %eax,_g1(%rip)
+        movl %eax,L1(%rip)
+
+// FIXME: Darwin 'as' gets most of these wrong, and there is an ambiguity in ATT
+// syntax in what they should mean in the first place (absolute or
+// rip-relative address).
+//        movl %eax,L0 - _g0(%rip)
+//        movl %eax,_g1 - _g0(%rip)
+//        movl %eax,_g1 - L1(%rip)
+//        movl %eax,L1 - _g0(%rip)
+
+        movl %eax,_g2(%rip)
+        movl %eax,L2(%rip)
+        movl %eax,_g3(%rip)
+        movl %eax,L3(%rip)
+
+// FIXME: Darwin 'as' gets most of these wrong, and there is an ambiguity in ATT
+// syntax in what they should mean in the first place (absolute or
+// rip-relative address).
+//        movl %eax,L2 - _g2(%rip)
+//        movl %eax,_g3 - _g2(%rip)
+//        movl %eax,_g3 - L3(%rip)
+//        movl %eax,L3 - _g2(%rip)
+
+_g1:
+        xorl %eax,%eax
+L1:
+        xorl %eax,%eax
+
+        .data
+_g2:
+        xorl %eax,%eax
+L2:
+        .quad 4
+//        .quad _baz
+        .quad _g2
+        .quad L2
+        .quad _g3
+        .quad L3
+        .quad L2 - _g2
+        .quad _g3 - _g2
+        .quad L3 - _g2
+        .quad L3 - _g3
+
+        .quad _g0
+        .quad L0
+        .quad _g1
+        .quad L1
+        .quad L0 - _g0
+        .quad _g1 - _g0
+        .quad L1 - _g0
+        .quad L1 - _g1
+
+_g3:
+        xorl %eax,%eax
+L3:
+        xorl %eax,%eax
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 236)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 236)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 94)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 604)
+// CHECK:     ('num_reloc', 12)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+
+// FIXME: Unfortunately, we do not get these relocations in exactly the same
+// order as Darwin 'as'. It turns out that 'as' *usually* ends up emitting
+// them in reverse address order, but sometimes it allocates some
+// additional relocations late so these end up preceed the other entries. I
+// haven't figured out the exact criteria for this yet.
+        
+// CHECK:     (('word-0', 0x56),
+// CHECK:      ('word-1', 0x1d000004)),
+// CHECK:     (('word-0', 0x50),
+// CHECK:      ('word-1', 0x1d000004)),
+// CHECK:     (('word-0', 0x4a),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     (('word-0', 0x44),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     (('word-0', 0x3e),
+// CHECK:      ('word-1', 0x1d000002)),
+// CHECK:     (('word-0', 0x38),
+// CHECK:      ('word-1', 0x1d000002)),
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x2d000004)),
+// CHECK:     (('word-0', 0x1b),
+// CHECK:      ('word-1', 0x2d000004)),
+// CHECK:     (('word-0', 0x16),
+// CHECK:      ('word-1', 0x2d000003)),
+// CHECK:     (('word-0', 0x11),
+// CHECK:      ('word-1', 0x2d000003)),
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0x2d000002)),
+// CHECK:     (('word-0', 0x5),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 94)
+// CHECK:     ('size', 142)
+// CHECK:     ('offset', 462)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 700)
+// CHECK:     ('num_reloc', 16)
+// CHECK:     ('flags', 0x400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0x5e000001)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x72),
+// CHECK:      ('word-1', 0x5e000001)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x72),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x62),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x5a),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x52),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x4a),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x3a),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x3a),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x32),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0x32),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0x22),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0x12),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0xa),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 828)
+// CHECK:   ('nsyms', 5)
+// CHECK:   ('stroff', 908)
+// CHECK:   ('strsize', 24)
+// CHECK:   ('_string_data', '\x00_foo\x00_g0\x00_g1\x00_g2\x00_g3\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 6)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', '_g0')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 90)
+// CHECK:     ('_string', '_g1')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 14)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 94)
+// CHECK:     ('_string', '_g2')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 18)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 232)
+// CHECK:     ('_string', '_g3')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 5)
+// CHECK:   ('iextdefsym', 5)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 5)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/darwin-x86_64-reloc-offsets.s b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
new file mode 100644
index 0000000..ab6820e
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
@@ -0,0 +1,343 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .data
+
+        .org 0x10
+L0:
+        .long 0
+        .long 0
+        .long 0
+        .long 0
+
+_d:
+        .long 0
+L1:
+        .long 0
+
+        .text
+
+// These generate normal x86_64 (external) relocations. They could all use
+// SIGNED, but don't for pedantic compatibility with Darwin 'as'.
+
+        // SIGNED1
+ 	movb  $0x12, _d(%rip)
+
+        // SIGNED
+ 	movb  $0x12, _d + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, _d(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, _d + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, _d + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, _d + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, _d + 4(%rip)
+
+	movb  %al, _d(%rip)
+ 	movb  %al, _d + 1(%rip)
+ 	movl  %eax, _d(%rip)
+ 	movl  %eax, _d + 1(%rip)
+ 	movl  %eax, _d + 2(%rip)
+ 	movl  %eax, _d + 3(%rip)
+ 	movl  %eax, _d + 4(%rip)
+
+// These have to use local relocations. Since that uses an offset into the
+// section in x86_64 (as opposed to a scattered relocation), and since the
+// linker can only decode this to an atom + offset by scanning the section,
+// it is not possible to correctly encode these without SIGNED<N>. This is
+// ultimately due to a design flaw in the x86_64 relocation format, it is
+// not possible to encode an address (L<foo> + <constant>) which is outside the
+// atom containing L<foo>.
+
+        // SIGNED1
+ 	movb  $0x12, L0(%rip)
+
+        // SIGNED
+ 	movb  $0x12, L0 + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, L0(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L0 + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, L0 + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, L0 + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L0 + 4(%rip)
+
+ 	movb  %al, L0(%rip)
+ 	movb  %al, L0 + 1(%rip)
+ 	movl  %eax, L0(%rip)
+ 	movl  %eax, L0 + 1(%rip)
+ 	movl  %eax, L0 + 2(%rip)
+ 	movl  %eax, L0 + 3(%rip)
+ 	movl  %eax, L0 + 4(%rip)
+
+        // SIGNED1
+ 	movb  $0x12, L1(%rip)
+
+        // SIGNED
+ 	movb  $0x12, L1 + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, L1(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L1 + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, L1 + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, L1 + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L1 + 4(%rip)
+
+ 	movb  %al, L1(%rip)
+ 	movb  %al, L1 + 1(%rip)
+ 	movl  %eax, L1(%rip)
+ 	movl  %eax, L1 + 1(%rip)
+ 	movl  %eax, L1 + 2(%rip)
+ 	movl  %eax, L1 + 3(%rip)
+ 	movl  %eax, L1 + 4(%rip)
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 358)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 358)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 318)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 728)
+// CHECK:     ('num_reloc', 42)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x13a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x134),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x12e),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x128),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x122),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x11c),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x116),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x10c),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x102),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0xf8),
+// CHECK:      ('word-1', 0x7d000000)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0xee),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0xe4),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0xdd),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0xd6),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0xd0),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0xca),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 16
+// CHECK:     (('word-0', 0xc4),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 17
+// CHECK:     (('word-0', 0xbe),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 18
+// CHECK:     (('word-0', 0xb8),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 19
+// CHECK:     (('word-0', 0xb2),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 20
+// CHECK:     (('word-0', 0xac),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 21
+// CHECK:     (('word-0', 0xa2),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 22
+// CHECK:     (('word-0', 0x98),
+// CHECK:      ('word-1', 0x65000002)),
+// CHECK:     # Relocation 23
+// CHECK:     (('word-0', 0x8e),
+// CHECK:      ('word-1', 0x75000002)),
+// CHECK:     # Relocation 24
+// CHECK:     (('word-0', 0x84),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 25
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0x85000002)),
+// CHECK:     # Relocation 26
+// CHECK:     (('word-0', 0x73),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 27
+// CHECK:     (('word-0', 0x6c),
+// CHECK:      ('word-1', 0x65000002)),
+// CHECK:     # Relocation 28
+// CHECK:     (('word-0', 0x66),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 29
+// CHECK:     (('word-0', 0x60),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 30
+// CHECK:     (('word-0', 0x5a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 31
+// CHECK:     (('word-0', 0x54),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 32
+// CHECK:     (('word-0', 0x4e),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 33
+// CHECK:     (('word-0', 0x48),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 34
+// CHECK:     (('word-0', 0x42),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 35
+// CHECK:     (('word-0', 0x38),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 36
+// CHECK:     (('word-0', 0x2e),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 37
+// CHECK:     (('word-0', 0x24),
+// CHECK:      ('word-1', 0x7d000000)),
+// CHECK:     # Relocation 38
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 39
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 40
+// CHECK:     (('word-0', 0x9),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 41
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc6\x05\xff\xff\xff\xff\x12\xc6\x05\x00\x00\x00\x00\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\xc7\x05\xfd\xff\xff\xffxV4\x12\xc7\x05\xfe\xff\xff\xffxV4\x12\xc7\x05\xff\xff\xff\xffxV4\x12\xc7\x05\x00\x00\x00\x00xV4\x12\x88\x05\x00\x00\x00\x00\x88\x05\x01\x00\x00\x00\x89\x05\x00\x00\x00\x00\x89\x05\x01\x00\x00\x00\x89\x05\x02\x00\x00\x00\x89\x05\x03\x00\x00\x00\x89\x05\x04\x00\x00\x00\xc6\x05\xdd\x00\x00\x00\x12\xc6\x05\xd7\x00\x00\x00\x12\xc7\x05\xcc\x00\x00\x00xV4\x12\xc7\x05\xc3\x00\x00\x00xV4\x12\xc7\x05\xba\x00\x00\x00xV4\x12\xc7\x05\xb1\x00\x00\x00xV4\x12\xc7\x05\xa8\x00\x00\x00xV4\x12\x88\x05\x9e\x00\x00\x00\x88\x05\x99\x00\x00\x00\x89\x05\x92\x00\x00\x00\x89\x05\x8d\x00\x00\x00\x89\x05\x88\x00\x00\x00\x89\x05\x83\x00\x00\x00\x89\x05~\x00\x00\x00\xc6\x05\x03\x00\x00\x00\x12\xc6\x05\x04\x00\x00\x00\x12\xc7\x05\x00\x00\x00\x00xV4\x12\xc7\x05\x01\x00\x00\x00xV4\x12\xc7\x05\x02\x00\x00\x00xV4\x12\xc7\x05\x03\x00\x00\x00xV4\x12\xc7\x05\x04\x00\x00\x00xV4\x12\x88\x05\x04\x00\x00\x00\x88\x05\x05\x00\x00\x00\x89\x05\x04\x00\x00\x00\x89\x05\x05\x00\x00\x00\x89\x05\x06\x00\x00\x00\x89\x05\x07\x00\x00\x00\x89\x05\x08\x00\x00\x00')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 318)
+// CHECK:     ('size', 40)
+// CHECK:     ('offset', 686)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 1064)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 1080)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00_d\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 350)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/darwin-x86_64-reloc.s b/test/MC/MachO/darwin-x86_64-reloc.s
new file mode 100644
index 0000000..6b325b0
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-reloc.s
@@ -0,0 +1,229 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// These examples are taken from <mach-o/x86_64/reloc.h>.
+
+        .text
+_foo:
+        ret
+
+_baz:
+        call _foo
+ 	call _foo+4
+ 	movq _foo@GOTPCREL(%rip), %rax
+ 	pushq _foo@GOTPCREL(%rip)
+ 	movl _foo(%rip), %eax
+ 	movl _foo+4(%rip), %eax
+ 	movb  $0x12, _foo(%rip)
+ 	movl  $0x12345678, _foo(%rip)
+ 	.quad _foo
+_bar:
+ 	.quad _foo+4
+ 	.quad _foo - _bar
+ 	.quad _foo - _bar + 4
+ 	.long _foo - _bar
+ 	leaq L1(%rip), %rax
+ 	leaq L0(%rip), %rax
+        addl $6,L0(%rip)
+        addw $500,L0(%rip)
+        addl $500,L0(%rip)
+
+_prev:
+        .space 12,0x90
+ 	.quad L1
+L0:
+        .quad L0
+L_pc:
+ 	.quad _foo - L_pc
+ 	.quad _foo - L1
+L1:
+ 	.quad L1 - _prev
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 256)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 152)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 181)
+// CHECK:   ('file_offset', 288)
+// CHECK:   ('file_size', 181)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 181)
+// CHECK:     ('offset', 288)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 472)
+// CHECK:     ('num_reloc', 27)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xa5),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xa5),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x9d),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x9d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x95),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x8d),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x79),
+// CHECK:      ('word-1', 0x8d000003)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x71),
+// CHECK:      ('word-1', 0x7d000003)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x69),
+// CHECK:      ('word-1', 0x6d000003)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x63),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x5c),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0x55),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0x55),
+// CHECK:      ('word-1', 0xc000000)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0x4d),
+// CHECK:      ('word-1', 0x5e000002)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0x4d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0x45),
+// CHECK:      ('word-1', 0x5e000002)),
+// CHECK:     # Relocation 16
+// CHECK:     (('word-0', 0x45),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 17
+// CHECK:     (('word-0', 0x3d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 18
+// CHECK:     (('word-0', 0x35),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 19
+// CHECK:     (('word-0', 0x2d),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 20
+// CHECK:     (('word-0', 0x26),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 21
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 22
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 23
+// CHECK:     (('word-0', 0x14),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 24
+// CHECK:     (('word-0', 0xe),
+// CHECK:      ('word-1', 0x3d000000)),
+// CHECK:     # Relocation 25
+// CHECK:     (('word-0', 0x7),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:     # Relocation 26
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 688)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 752)
+// CHECK:   ('strsize', 24)
+// CHECK:   ('_string_data', '\x00_foo\x00_baz\x00_bar\x00_prev\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 6)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 1)
+// CHECK:     ('_string', '_baz')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 11)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 61)
+// CHECK:     ('_string', '_bar')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 129)
+// CHECK:     ('_string', '_prev')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 4)
+// CHECK:   ('iextdefsym', 4)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/reloc.s b/test/MC/MachO/reloc.s
index e86ed8c..c305eeb 100644
--- a/test/MC/MachO/reloc.s
+++ b/test/MC/MachO/reloc.s
@@ -10,7 +10,7 @@ local_a_ext:
 
 local_a:
         .long 0
-local_a_elt:      
+local_a_elt:
         .long 0
 local_b:
         .long local_b - local_c + 245
@@ -27,9 +27,20 @@ local_c:
         .const
 
         .long
-bar:    
+bar:
         .long local_a_elt - bar + 33
 
+L0:
+        .long L0
+        .long L1
+
+        .text
+_f0:
+L1:
+        jmp L0
+        jmp L1
+        ret
+
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
@@ -42,9 +53,9 @@ bar:
 // CHECK:   ('size', 260)
 // CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 47)
+// CHECK:   ('vm_size', 63)
 // CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 47)
+// CHECK:   ('file_size', 63)
 // CHECK:   ('maxprot', 7)
 // CHECK:   ('initprot', 7)
 // CHECK:   ('num_sections', 3)
@@ -54,26 +65,29 @@ bar:
 // CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
+// CHECK:     ('size', 8)
 // CHECK:     ('offset', 392)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reloc_offset', 456)
+// CHECK:     ('num_reloc', 1)
+// CHECK:     ('flags', 0x80000400)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x1),
+// CHECK:      ('word-1', 0x5000003)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '')
+// CHECK:   ('_section_data', '\xe92\x00\x00\x00\xeb\xf9\xc3')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
+// CHECK:     ('address', 8)
 // CHECK:     ('size', 43)
-// CHECK:     ('offset', 392)
+// CHECK:     ('offset', 400)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 440)
+// CHECK:     ('reloc_offset', 464)
 // CHECK:     ('num_reloc', 9)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
@@ -82,72 +96,78 @@ bar:
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
 // CHECK:     (('word-0', 0x8000002a),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 1
 // CHECK:     (('word-0', 0x90000028),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 2
 // CHECK:     (('word-0', 0xa0000024),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0xa0000020),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 4
 // CHECK:     (('word-0', 0xa4000014),
-// CHECK:      ('word-1', 0x14)),
+// CHECK:      ('word-1', 0x1c)),
 // CHECK:     # Relocation 5
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x1c)),
+// CHECK:      ('word-1', 0x24)),
 // CHECK:     # Relocation 6
 // CHECK:     (('word-0', 0x8),
 // CHECK:      ('word-1', 0x4000002)),
 // CHECK:     # Relocation 7
 // CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0xc000006)),
+// CHECK:      ('word-1', 0xc000007)),
 // CHECK:     # Relocation 8
 // CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xc000006)),
+// CHECK:      ('word-1', 0xc000007)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x1a\x00\x00\x00$\x00i')
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 43)
-// CHECK:     ('size', 4)
-// CHECK:     ('offset', 435)
+// CHECK:     ('address', 51)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 443)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 512)
-// CHECK:     ('num_reloc', 2)
+// CHECK:     ('reloc_offset', 536)
+// CHECK:     ('num_reloc', 4)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0xa4000000),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4000001)),
 // CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0x4000003)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xa4000000),
+// CHECK:      ('word-1', 0x18)),
+// CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x2b)),
+// CHECK:      ('word-1', 0x33)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x06\x00\x00\x00')
+// CHECK:   ('_section_data', '\x06\x00\x00\x007\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
-// CHECK:   ('symoff', 528)
-// CHECK:   ('nsyms', 7)
-// CHECK:   ('stroff', 612)
-// CHECK:   ('strsize', 60)
-// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00\x00')
+// CHECK:   ('symoff', 568)
+// CHECK:   ('nsyms', 8)
+// CHECK:   ('stroff', 664)
+// CHECK:   ('strsize', 64)
+// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
 // CHECK:    (('n_strx', 19)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 12)
+// CHECK:     ('n_value', 20)
 // CHECK:     ('_string', 'local_a')
 // CHECK:    ),
 // CHECK:     # Symbol 1
@@ -155,7 +175,7 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 16)
+// CHECK:     ('n_value', 24)
 // CHECK:     ('_string', 'local_a_elt')
 // CHECK:    ),
 // CHECK:     # Symbol 2
@@ -163,7 +183,7 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 20)
+// CHECK:     ('n_value', 28)
 // CHECK:     ('_string', 'local_b')
 // CHECK:    ),
 // CHECK:     # Symbol 3
@@ -171,7 +191,7 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 28)
+// CHECK:     ('n_value', 36)
 // CHECK:     ('_string', 'local_c')
 // CHECK:    ),
 // CHECK:     # Symbol 4
@@ -179,18 +199,26 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 43)
+// CHECK:     ('n_value', 51)
 // CHECK:     ('_string', 'bar')
 // CHECK:    ),
 // CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 59)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_f0')
+// CHECK:    ),
+// CHECK:     # Symbol 6
 // CHECK:    (('n_strx', 7)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
+// CHECK:     ('n_value', 16)
 // CHECK:     ('_string', 'local_a_ext')
 // CHECK:    ),
-// CHECK:     # Symbol 6
+// CHECK:     # Symbol 7
 // CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -204,10 +232,10 @@ bar:
 // CHECK:  (('command', 11)
 // CHECK:   ('size', 80)
 // CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 5)
-// CHECK:   ('iextdefsym', 5)
+// CHECK:   ('nlocalsym', 6)
+// CHECK:   ('iextdefsym', 6)
 // CHECK:   ('nextdefsym', 1)
-// CHECK:   ('iundefsym', 6)
+// CHECK:   ('iundefsym', 7)
 // CHECK:   ('nundefsym', 1)
 // CHECK:   ('tocoff', 0)
 // CHECK:   ('ntoc', 0)
diff --git a/test/MC/MachO/Darwin/optimal_nop.s b/test/MC/MachO/x86_32-optimal_nop.s
index 29cb073..d21d143 100644
--- a/test/MC/MachO/Darwin/optimal_nop.s
+++ b/test/MC/MachO/x86_32-optimal_nop.s
@@ -1,9 +1,4 @@
-// Validate that we can assemble this file exactly like the platform
-// assembler.
-//
-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin10 -o %t.mc.o %s
-// RUN: as -arch i386 -o %t.as.o %s
-// RUN: diff %t.mc.o %t.as.o
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
 
 # 1 byte nop test
         .align 4, 0 # start with 16 byte alignment filled with zeros
@@ -154,3 +149,43 @@
         # 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
         .align 4, 0x90
         ret
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 124)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 337)
+// CHECK:   ('file_offset', 152)
+// CHECK:   ('file_size', 337)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 337)
+// CHECK:     ('offset', 152)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc3\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\x0f\x1f@\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3f\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3f\x0f\x1fD\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/TableGen/2003-08-03-PassCode.td b/test/TableGen/2003-08-03-PassCode.td
index 7142186..c02f499 100644
--- a/test/TableGen/2003-08-03-PassCode.td
+++ b/test/TableGen/2003-08-03-PassCode.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class test<code C> {
   code Code = C;
diff --git a/test/TableGen/2006-09-18-LargeInt.td b/test/TableGen/2006-09-18-LargeInt.td
index afd813f..194699a 100644
--- a/test/TableGen/2006-09-18-LargeInt.td
+++ b/test/TableGen/2006-09-18-LargeInt.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep -- 4294901760
+// XFAIL: vg_leak
 
 def X {
   int Y = 0xFFFF0000;
diff --git a/test/TableGen/AnonDefinitionOnDemand.td b/test/TableGen/AnonDefinitionOnDemand.td
index d567fc8..b10ad58 100644
--- a/test/TableGen/AnonDefinitionOnDemand.td
+++ b/test/TableGen/AnonDefinitionOnDemand.td
@@ -1,4 +1,5 @@
 // RUN: tblgen < %s
+// XFAIL: vg_leak
 
 class foo<int X> { int THEVAL = X; }
 def foo_imp : foo<1>;
diff --git a/test/TableGen/DagDefSubst.td b/test/TableGen/DagDefSubst.td
index e5eebe9..92a207f 100644
--- a/test/TableGen/DagDefSubst.td
+++ b/test/TableGen/DagDefSubst.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {dag d = (X Y)}
 // RUN: tblgen %s | grep {dag e = (Y X)}
+// XFAIL: vg_leak
 def X;
 
 class yclass;
diff --git a/test/TableGen/DagIntSubst.td b/test/TableGen/DagIntSubst.td
index 3c1291c..00fde69 100644
--- a/test/TableGen/DagIntSubst.td
+++ b/test/TableGen/DagIntSubst.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {dag d = (X 13)}
+// XFAIL: vg_leak
 def X;
 
 class C<int N> {
diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td
index 4f37edf..9e16670 100644
--- a/test/TableGen/DefmInherit.td
+++ b/test/TableGen/DefmInherit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {zing = 4} | count 4
+// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/ForwardRef.td b/test/TableGen/ForwardRef.td
index 2056b1f..955cc14 100644
--- a/test/TableGen/ForwardRef.td
+++ b/test/TableGen/ForwardRef.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s -o -
+// XFAIL: vg_leak
 
 class bar {
   list<bar> x;
diff --git a/test/TableGen/GeneralList.td b/test/TableGen/GeneralList.td
index 7f099f2..ca92a21 100644
--- a/test/TableGen/GeneralList.td
+++ b/test/TableGen/GeneralList.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 //
 // Test to make sure that lists work with any data-type
 
diff --git a/test/TableGen/IntBitInit.td b/test/TableGen/IntBitInit.td
index b949bfe..16ac9c8 100644
--- a/test/TableGen/IntBitInit.td
+++ b/test/TableGen/IntBitInit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 def {
   bit A = 1;
   int B = A;
diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td
index 145fd0b..fa53562 100644
--- a/test/TableGen/LazyChange.td
+++ b/test/TableGen/LazyChange.td
@@ -1,5 +1,5 @@
 // RUN: tblgen %s | grep {int Y = 3}
-
+// XFAIL: vg_leak
 
 class C {
   int X = 4;
diff --git a/test/TableGen/ListArgs.td b/test/TableGen/ListArgs.td
index daa0de6..a513db6 100644
--- a/test/TableGen/ListArgs.td
+++ b/test/TableGen/ListArgs.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class B<list<int> v> {
   list<int> vals = v;
diff --git a/test/TableGen/ListArgsSimple.td b/test/TableGen/ListArgsSimple.td
index b3b2078..f7caed6 100644
--- a/test/TableGen/ListArgsSimple.td
+++ b/test/TableGen/ListArgsSimple.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class B<int v> {
   int val = v;
diff --git a/test/TableGen/ListConversion.td b/test/TableGen/ListConversion.td
index 773ed6e..222b614 100644
--- a/test/TableGen/ListConversion.td
+++ b/test/TableGen/ListConversion.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 class A;
 class B : A;
 
diff --git a/test/TableGen/ListSlices.td b/test/TableGen/ListSlices.td
index be794cf..5848a4e 100644
--- a/test/TableGen/ListSlices.td
+++ b/test/TableGen/ListSlices.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 def A {
   list<int> B = [10, 20, 30, 4, 1, 1231, 20];
diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td
index 52ba59c..9f92b73 100644
--- a/test/TableGen/MultiClass.td
+++ b/test/TableGen/MultiClass.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {zing = 4} | count 2
+// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiClassDefName.td b/test/TableGen/MultiClassDefName.td
index 2e71f7d..138c93d 100644
--- a/test/TableGen/MultiClassDefName.td
+++ b/test/TableGen/MultiClassDefName.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep WorldHelloCC | count 1
+// XFAIL: vg_leak
 
 class C<string n> {
   string name = n;
diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td
index d4c4ce5..9da80ba 100644
--- a/test/TableGen/MultiClassInherit.td
+++ b/test/TableGen/MultiClassInherit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {zing = 4} | count 28
+// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index cd9c6da..22bf7fb 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {\\\[(set} | count 2
 // RUN: tblgen %s | grep {\\\[\\\]} | count 2
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/String.td b/test/TableGen/String.td
index d2ae451..fc0f5b8 100644
--- a/test/TableGen/String.td
+++ b/test/TableGen/String.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s 
+// XFAIL: vg_leak
 class x {
   string y = "missing terminating '\"' character";
 }
diff --git a/test/TableGen/SuperSubclassSameName.td b/test/TableGen/SuperSubclassSameName.td
index 087df87..304c883 100644
--- a/test/TableGen/SuperSubclassSameName.td
+++ b/test/TableGen/SuperSubclassSameName.td
@@ -1,4 +1,5 @@
 // RUN: tblgen < %s
+// XFAIL: vg_leak
 // Test for template arguments that have the same name as superclass template
 // arguments.
 
diff --git a/test/TableGen/TargetInstrInfo.td b/test/TableGen/TargetInstrInfo.td
index 8299541..2871eb8 100644
--- a/test/TableGen/TargetInstrInfo.td
+++ b/test/TableGen/TargetInstrInfo.td
@@ -1,6 +1,7 @@
 // This test describes how we eventually want to describe instructions in
 // the target independent code generators.
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 // Target indep stuff.
 class Instruction {   // Would have other stuff eventually
diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td
index 7c3dd57..a7ca902 100644
--- a/test/TableGen/TargetInstrSpec.td
+++ b/test/TableGen/TargetInstrSpec.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_pd VR128:\$src1, VR128:\$src2))\\\]} | count 1
 // RUN: tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_ps VR128:\$src1, VR128:\$src2))\\\]} | count 1
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/TemplateArgRename.td b/test/TableGen/TemplateArgRename.td
index 535c2e4..ee5d2cf 100644
--- a/test/TableGen/TemplateArgRename.td
+++ b/test/TableGen/TemplateArgRename.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 // Make sure there is no collision between XX and XX.
 def S;
diff --git a/test/TableGen/Tree.td b/test/TableGen/Tree.td
index f9f1f15..2796cfd 100644
--- a/test/TableGen/Tree.td
+++ b/test/TableGen/Tree.td
@@ -1,5 +1,6 @@
 // This tests to make sure we can parse tree patterns.
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class TreeNode;
 class RegisterClass;
diff --git a/test/TableGen/TreeNames.td b/test/TableGen/TreeNames.td
index 05a3298..ccdeb88 100644
--- a/test/TableGen/TreeNames.td
+++ b/test/TableGen/TreeNames.td
@@ -1,5 +1,6 @@
 // This tests to make sure we can parse tree patterns with names.
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class TreeNode;
 class RegisterClass;
diff --git a/test/TableGen/UnsetBitInit.td b/test/TableGen/UnsetBitInit.td
index 91342ec..ff70108 100644
--- a/test/TableGen/UnsetBitInit.td
+++ b/test/TableGen/UnsetBitInit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 class x {
   field bits<32> A;
 }
diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td
index 4a771ae..8164e74 100644
--- a/test/TableGen/cast.td
+++ b/test/TableGen/cast.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {add_ps} | count 3
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/eq.td b/test/TableGen/eq.td
index 8ba6d7e..518a80a 100644
--- a/test/TableGen/eq.td
+++ b/test/TableGen/eq.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: Value = 0
 // CHECK: Value = 1
 
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index acce449..d4d81f8 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,7 @@
 // RUN: tblgen %s | grep {Jr} | count 2
 // RUN: tblgen %s | grep {Sr} | count 2
 // RUN: tblgen %s | grep {NAME} | count 1
+// XFAIL: vg_leak
 
 // Variables for foreach
 class decls {
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 9b24382..0bac0ba 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {\\\[1, 2, 3\\\]} | count 4
 // RUN: tblgen %s | grep {\\\[4, 5, 6\\\]} | count 2
+// XFAIL: vg_leak
 
 class A<list<list<int>> vals> {
   list<int> first = vals[0];
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index 3e392fd..b521e04 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {}
+// XFAIL: vg_leak
 
 class List<list<string> n> {
   list<string> names = n;
diff --git a/test/TableGen/nameconcat.td b/test/TableGen/nameconcat.td
index fc865f9..fd2880a 100644
--- a/test/TableGen/nameconcat.td
+++ b/test/TableGen/nameconcat.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {add_ps} | count 3
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/strconcat.td b/test/TableGen/strconcat.td
index fc0d805..38409a9 100644
--- a/test/TableGen/strconcat.td
+++ b/test/TableGen/strconcat.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep fufoo
+// XFAIL: vg_leak
 
 class Y<string S> {
   string T = !strconcat(S, "foo");
diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td
index ce9f45d..05d424f 100644
--- a/test/TableGen/subst.td
+++ b/test/TableGen/subst.td
@@ -4,6 +4,7 @@
 // RUN: tblgen %s | grep {LAST} | count 1
 // RUN: tblgen %s | grep {TVAR} | count 2
 // RUN: tblgen %s | grep {Bogus} | count 1
+// XFAIL: vg_leak
 
 class Honorific<string t> {
   string honorific = t;
diff --git a/test/TableGen/subst2.td b/test/TableGen/subst2.td
index 3366c9d..584266e 100644
--- a/test/TableGen/subst2.td
+++ b/test/TableGen/subst2.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: No subst
 // CHECK: No foo
 // CHECK: RECURSE foo
diff --git a/test/lit.cfg b/test/lit.cfg
index 929871a..fd3120a 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -128,6 +128,10 @@ excludes = []
 # Provide target_triple for use in XFAIL and XTARGET.
 config.target_triple = site_exp['target_triplet']
 
+# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
+# triple so we can check it with XFAIL and XTARGET.
+config.target_triple += lit.valgrindTriple
+
 # Provide llvm_supports_target for use in local configs.
 targets = set(site_exp["TARGETS_TO_BUILD"].split())
 def llvm_supports_target(name):
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index abf5d8e..813c96c 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -68,12 +68,12 @@ std::string llvm::getPassesString(const std::vector<const PassInfo*> &Passes) {
 }
 
 BugDriver::BugDriver(const char *toolname, bool as_child, bool find_bugs,
-                     unsigned timeout, unsigned memlimit,
+                     unsigned timeout, unsigned memlimit, bool use_valgrind,
                      LLVMContext& ctxt)
   : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
     Program(0), Interpreter(0), SafeInterpreter(0), gcc(0),
     run_as_child(as_child), run_find_bugs(find_bugs), Timeout(timeout), 
-    MemoryLimit(memlimit)  {}
+    MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
 
 
 /// ParseInputFile - Given a bitcode or assembly input filename, parse and
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index db35c85..0a10a61 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -55,6 +55,7 @@ class BugDriver {
   bool run_find_bugs;
   unsigned Timeout;
   unsigned MemoryLimit;
+  bool UseValgrind;
 
   // FIXME: sort out public/private distinctions...
   friend class ReducePassList;
@@ -62,7 +63,8 @@ class BugDriver {
 
 public:
   BugDriver(const char *toolname, bool as_child, bool find_bugs,
-            unsigned timeout, unsigned memlimit, LLVMContext& ctxt);
+            unsigned timeout, unsigned memlimit, bool use_valgrind,
+            LLVMContext& ctxt);
 
   const char *getToolName() const { return ToolName; }
 
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 9f712e0..3a6149b 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -45,8 +45,6 @@ namespace {
   // ChildOutput - This option captures the name of the child output file that
   // is set up by the parent bugpoint process
   cl::opt<std::string> ChildOutput("child-output", cl::ReallyHidden);
-  cl::opt<bool> UseValgrind("enable-valgrind",
-                            cl::desc("Run optimizations through valgrind"));
 }
 
 /// writeProgramToFile - This writes the current "Program" to the named bitcode
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 565f3f9..e14f31e 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/System/Process.h"
 #include "llvm/System/Signals.h"
+#include "llvm/System/Valgrind.h"
 #include "llvm/LinkAllVMCore.h"
 using namespace llvm;
 
@@ -48,9 +49,14 @@ TimeoutValue("timeout", cl::init(300), cl::value_desc("seconds"),
              cl::desc("Number of seconds program is allowed to run before it "
                       "is killed (default is 300s), 0 disables timeout"));
 
-static cl::opt<unsigned>
-MemoryLimit("mlimit", cl::init(100), cl::value_desc("MBytes"),
-             cl::desc("Maximum amount of memory to use. 0 disables check."));
+static cl::opt<int>
+MemoryLimit("mlimit", cl::init(-1), cl::value_desc("MBytes"),
+             cl::desc("Maximum amount of memory to use. 0 disables check."
+                      " Defaults to 100MB (800MB under valgrind)."));
+
+static cl::opt<bool>
+UseValgrind("enable-valgrind",
+            cl::desc("Run optimizations through valgrind"));
 
 // The AnalysesList is automatically populated with registered Passes by the
 // PassNameParser.
@@ -108,7 +114,17 @@ int main(int argc, char **argv) {
     outs() << "Override triple set to '" << OverrideTriple << "'\n";
   }
 
-  BugDriver D(argv[0], AsChild, FindBugs, TimeoutValue, MemoryLimit, Context);
+  if (MemoryLimit < 0) {
+    // Set the default MemoryLimit.  Be sure to update the flag's description if
+    // you change this.
+    if (sys::RunningOnValgrind() || UseValgrind)
+      MemoryLimit = 800;
+    else
+      MemoryLimit = 100;
+  }
+
+  BugDriver D(argv[0], AsChild, FindBugs, TimeoutValue, MemoryLimit,
+              UseValgrind, Context);
   if (D.addSources(InputFilenames)) return 1;
   
   AddToDriver PM(D);
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
index a3c5879..cd8f4b0 100644
--- a/tools/edis/Makefile
+++ b/tools/edis/Makefile
@@ -39,11 +39,12 @@ ifeq ($(HOST_OS),Darwin)
                          -Wl,-seg1addr -Wl,0xE0000000 
 
     # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
+    # Path is /Developer/usr/local/lib for now; will use an rpath-based mechanism soon
     DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
     ifneq ($(DARWIN_VERS),8)
        LLVMLibsOptions    := $(LLVMLibsOptions)  \
                             -no-undefined -Wl,-install_name \
-                            -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
+                            -Wl,"/Developer/usr/local/lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
     endif
 endif
 
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index dbfe7a5..0caf539 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -47,8 +47,8 @@ public:
 };
 }
 
-static bool PrintInsts(const llvm::MCDisassembler &DisAsm,
-                      llvm::MCInstPrinter &Printer, const ByteArrayTy &Bytes,
+static bool PrintInsts(const MCDisassembler &DisAsm,
+                      MCInstPrinter &Printer, const ByteArrayTy &Bytes,
                       SourceMgr &SM) {
   // Wrap the vector in a MemoryObject.
   VectorMemoryObject memoryObject(Bytes);
@@ -77,24 +77,23 @@ static bool PrintInsts(const llvm::MCDisassembler &DisAsm,
 }
 
 int Disassembler::disassemble(const Target &T, const std::string &Triple,
-                                 MemoryBuffer &Buffer) {
+                              MemoryBuffer &Buffer) {
   // Set up disassembler.
-  llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
+  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
   
   if (!AsmInfo) {
     errs() << "error: no assembly info for target " << Triple << "\n";
     return -1;
   }
   
-  llvm::OwningPtr<const llvm::MCDisassembler> DisAsm(T.createMCDisassembler());
+  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler());
   if (!DisAsm) {
     errs() << "error: no disassembler for target " << Triple << "\n";
     return -1;
   }
   
-  llvm::MCInstPrinter *InstPrinter = T.createMCInstPrinter(0, *AsmInfo, outs());
-  
-  if (!InstPrinter) {
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(0, *AsmInfo, outs()));
+  if (!IP) {
     errs() << "error: no instruction printer for target " << Triple << '\n';
     return -1;
   }
@@ -151,7 +150,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
   }
   
   if (!ByteArray.empty())
-    ErrorOccurred |= PrintInsts(*DisAsm, *InstPrinter, ByteArray, SM);
+    ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM);
     
   return ErrorOccurred;
 }
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 66e1260..3c23990 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -152,7 +152,7 @@ static int AsLexInput(const char *ProgName) {
   if (!TheTarget)
     return 1;
 
-  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  llvm::OwningPtr<MCAsmInfo> MAI((MCAsmInfo*) TheTarget->createAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
 
   AsmLexer Lexer(*MAI);
@@ -260,7 +260,7 @@ static int AssembleInput(const char *ProgName) {
   SrcMgr.setIncludeDirs(IncludeDirs);
   
   
-  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  llvm::OwningPtr<MCAsmInfo> MAI((MCAsmInfo*) TheTarget->createAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
   
   MCContext Ctx(*MAI);
@@ -278,18 +278,17 @@ static int AssembleInput(const char *ProgName) {
     return 1;
   }
 
-  OwningPtr<MCInstPrinter> IP;
   OwningPtr<MCCodeEmitter> CE;
   OwningPtr<MCStreamer> Str;
   OwningPtr<TargetAsmBackend> TAB;
 
   if (FileType == OFT_AssemblyFile) {
-    IP.reset(TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *Out));
+    MCInstPrinter *IP =
+      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *Out);
     if (ShowEncoding)
       CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
     Str.reset(createAsmStreamer(Ctx, *Out,TM->getTargetData()->isLittleEndian(),
-                                /*asmverbose*/true, IP.get(), CE.get(),
-                                ShowInst));
+                                /*asmverbose*/true, IP, CE.get(), ShowInst));
   } else {
     assert(FileType == OFT_ObjectFile && "Invalid file type!");
     CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
@@ -319,13 +318,9 @@ static int AssembleInput(const char *ProgName) {
 }
 
 static int DisassembleInput(const char *ProgName) {
-  std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
-  if (TheTarget == 0) {
-    errs() << ProgName << ": error: unable to get target for '" << TripleName
-    << "', see --version and --triple.\n";
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
     return 0;
-  }
   
   std::string ErrorMessage;
   
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 5719960..964b04d 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -374,6 +374,7 @@ TEST(APFloatTest, makeNaN) {
 }
 
 #ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
 TEST(APFloatTest, SemanticsDeath) {
   EXPECT_DEATH(APFloat(APFloat::IEEEsingle, 0.0f).convertToDouble(), "Float semantics are not IEEEdouble");
   EXPECT_DEATH(APFloat(APFloat::IEEEdouble, 0.0 ).convertToFloat(),  "Float semantics are not IEEEsingle");
@@ -573,5 +574,6 @@ TEST(APFloatTest, StringHexadecimalExponentDeath) {
   EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p-"), "Exponent has no digits");
 }
 #endif
+#endif
 
 }
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 0b13aa4..d08e86a 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -328,6 +328,7 @@ TEST(APIntTest, Log2) {
 }
 
 #ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
 TEST(APIntTest, StringDeath) {
   EXPECT_DEATH(APInt(0, "", 0), "Bitwidth too small");
   EXPECT_DEATH(APInt(32, "", 0), "Invalid string length");
@@ -340,5 +341,6 @@ TEST(APIntTest, StringDeath) {
   EXPECT_DEATH(APInt(32, "1L", 10), "Invalid character in digit string");
 }
 #endif
+#endif
 
 }
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 8a81796..d7dc3af 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -384,7 +384,7 @@ TEST_F(SmallVectorTest, ConstVectorTest) {
 // Direct array access.
 TEST_F(SmallVectorTest, DirectVectorTest) {
   EXPECT_EQ(0u, theVector.size());
-  EXPECT_EQ(4u, theVector.capacity());
+  EXPECT_LE(4u, theVector.capacity());
   EXPECT_EQ(0, Constructable::getNumConstructorCalls());
   theVector.end()[0] = 1;
   theVector.end()[1] = 2;
diff --git a/unittests/Support/LeakDetectorTest.cpp b/unittests/Support/LeakDetectorTest.cpp
index 85ef046..d198c7a 100644
--- a/unittests/Support/LeakDetectorTest.cpp
+++ b/unittests/Support/LeakDetectorTest.cpp
@@ -15,6 +15,7 @@ using namespace llvm;
 namespace {
 
 #ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
 TEST(LeakDetector, Death1) {
   LeakDetector::addGarbageObject((void*) 1);
   LeakDetector::addGarbageObject((void*) 2);
@@ -25,5 +26,6 @@ TEST(LeakDetector, Death1) {
                "Cache != o && \"Object already in set!\"");
 }
 #endif
+#endif
 
 }
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index 2d98cad..c1baa74 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -8,8 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Instructions.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
@@ -20,11 +22,13 @@ TEST(InstructionsTest, ReturnInst) {
 
   // test for PR6589
   const ReturnInst* r0 = ReturnInst::Create(C);
+  EXPECT_EQ(r0->getNumOperands(), 0U);
   EXPECT_EQ(r0->op_begin(), r0->op_end());
 
   const IntegerType* Int1 = IntegerType::get(C, 1);
   Constant* One = ConstantInt::get(Int1, 1, true);
   const ReturnInst* r1 = ReturnInst::Create(C, One);
+  EXPECT_EQ(r1->getNumOperands(), 1U);
   User::const_op_iterator b(r1->op_begin());
   EXPECT_NE(b, r1->op_end());
   EXPECT_EQ(*b, One);
@@ -37,5 +41,88 @@ TEST(InstructionsTest, ReturnInst) {
   delete r1;
 }
 
+TEST(InstructionsTest, BranchInst) {
+  LLVMContext &C(getGlobalContext());
+
+  // Make a BasicBlocks
+  BasicBlock* bb0 = BasicBlock::Create(C);
+  BasicBlock* bb1 = BasicBlock::Create(C);
+
+  // Mandatory BranchInst
+  const BranchInst* b0 = BranchInst::Create(bb0);
+
+  EXPECT_TRUE(b0->isUnconditional());
+  EXPECT_FALSE(b0->isConditional());
+  EXPECT_EQ(b0->getNumSuccessors(), 1U);
+
+  // check num operands
+  EXPECT_EQ(b0->getNumOperands(), 1U);
+
+  EXPECT_NE(b0->op_begin(), b0->op_end());
+  EXPECT_EQ(next(b0->op_begin()), b0->op_end());
+
+  EXPECT_EQ(next(b0->op_begin()), b0->op_end());
+
+  const IntegerType* Int1 = IntegerType::get(C, 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+
+  // Conditional BranchInst
+  BranchInst* b1 = BranchInst::Create(bb0, bb1, One);
+
+  EXPECT_FALSE(b1->isUnconditional());
+  EXPECT_TRUE(b1->isConditional());
+  EXPECT_EQ(b1->getNumSuccessors(), 2U);
+
+  // check num operands
+  EXPECT_EQ(b1->getNumOperands(), 3U);
+
+  User::const_op_iterator b(b1->op_begin());
+
+  // check COND
+  EXPECT_NE(b, b1->op_end());
+  EXPECT_EQ(*b, One);
+  EXPECT_EQ(b1->getOperand(0), One);
+  EXPECT_EQ(b1->getCondition(), One);
+  ++b;
+
+  // check ELSE
+  EXPECT_EQ(*b, bb1);
+  EXPECT_EQ(b1->getOperand(1), bb1);
+  EXPECT_EQ(b1->getSuccessor(1), bb1);
+  ++b;
+
+  // check THEN
+  EXPECT_EQ(*b, bb0);
+  EXPECT_EQ(b1->getOperand(2), bb0);
+  EXPECT_EQ(b1->getSuccessor(0), bb0);
+  ++b;
+
+  EXPECT_EQ(b, b1->op_end());
+
+  // shrink it
+  b1->setUnconditionalDest(bb1);
+
+  // check num operands
+  EXPECT_EQ(b1->getNumOperands(), 1U);
+
+  User::const_op_iterator c(b1->op_begin());
+  EXPECT_NE(c, b1->op_end());
+
+  // check THEN
+  EXPECT_EQ(*c, bb1);
+  EXPECT_EQ(b1->getOperand(0), bb1);
+  EXPECT_EQ(b1->getSuccessor(0), bb1);
+  ++c;
+
+  EXPECT_EQ(c, b1->op_end());
+
+  // clean up
+  delete b0;
+  delete b1;
+
+  delete bb0;
+  delete bb1;
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 3c4742c..c6a1392 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -401,11 +401,12 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
     }
   }
 
-  if (Best != StringRef::npos && BestQuality < 50) {
-    // Print the "possible intended match here" line if we found something
-    // reasonable.
-    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
-                    "possible intended match here", "note");
+  // Print the "possible intended match here" line if we found something
+  // reasonable and not equal to what we showed in the "scanning from here"
+  // line.
+  if (Best && Best != StringRef::npos && BestQuality < 50) {
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
+                      "possible intended match here", "note");
 
     // FIXME: If we wanted to be really friendly we would show why the match
     // failed, as it can be hard to spot simple one character differences.
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index a306382..4287cc1 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -24,6 +24,7 @@ use Socket;
 #                   IMPLEMENTED.
 #  -nickname NAME   The NAME argument specifieds the nickname this script
 #                   will submit to the nightlytest results repository.
+#  -nouname         Don't include uname data (machine will be identified by nickname only).
 #  -submit-server   Specifies a server to submit the test results too. If this
 #                   option is not specified it defaults to
 #                   llvm.org. This is basically just the address of the
@@ -220,6 +221,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
                              $LLVMGCCPATH = $ARGV[0] . '/bin';
                              shift; next;}
   if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
+  if (/^-nouname$/)        { $NOUNAME = 1; next; }
   if (/^-use-gmake/)       { $MAKECMD = "gmake"; shift; next; }
   if (/^-extraflags/)      { $CONFIGUREARGS .=
                              " --with-extra-options=\'$ARGV[0]\'"; shift; next;}
@@ -693,12 +695,21 @@ $endtime = `date "+20%y-%m-%d %H:%M:%S"`;
 
 if ( $VERBOSE ) { print "PREPARING LOGS TO BE SENT TO SERVER\n"; }
 
-$machine_data = "uname: ".`uname -a`.
-                "hardware: ".`uname -m`.
-                "os: ".`uname -sr`.
-                "name: ".`uname -n`.
-                "date: ".`date \"+20%y-%m-%d\"`.
-                "time: ".`date +\"%H:%M:%S\"`;
+if ( ! $NOUNAME ) {
+    $machine_data = "uname: ".`uname -a`.
+        "hardware: ".`uname -m`.
+        "os: ".`uname -sr`.
+        "name: ".`uname -n`.
+        "date: ".`date \"+20%y-%m-%d\"`.
+        "time: ".`date +\"%H:%M:%S\"`;
+} else {
+    $machine_data = "uname: (excluded)\n".
+        "hardware: ".`uname -m`.
+        "os: ".`uname -sr`.
+        "name: $nickname\n".
+        "date: ".`date \"+20%y-%m-%d\"`.
+        "time: ".`date +\"%H:%M:%S\"`;
+}
 
 # Get gcc version.
 my $gcc_version_long = "";
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index b823e57..e5c068b 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -844,19 +844,20 @@ void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
   // Parse the instructions; we need to do this first so that we can gather the
   // singleton register classes.
   std::set<std::string> SingletonRegisterNames;
-  for (std::map<std::string, CodeGenInstruction>::const_iterator 
-         it = Target.getInstructions().begin(), 
-         ie = Target.getInstructions().end(); 
-       it != ie; ++it) {
-    const CodeGenInstruction &CGI = it->second;
+  
+  const std::vector<const CodeGenInstruction*> &InstrList =
+    Target.getInstructionsByEnumValue();
+  
+  for (unsigned i = 0, e = InstrList.size(); i != e; ++i) {
+    const CodeGenInstruction &CGI = *InstrList[i];
 
-    if (!StringRef(it->first).startswith(MatchPrefix))
+    if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix))
       continue;
 
-    OwningPtr<InstructionInfo> II(new InstructionInfo);
+    OwningPtr<InstructionInfo> II(new InstructionInfo());
     
-    II->InstrName = it->first;
-    II->Instr = &it->second;
+    II->InstrName = CGI.TheDef->getName();
+    II->Instr = &CGI;
     II->AsmString = FlattenVariants(CGI.AsmString, 0);
 
     // Remove comments from the asm string.
@@ -869,7 +870,7 @@ void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
     TokenizeAsmString(II->AsmString, II->Tokens);
 
     // Ignore instructions which shouldn't be matched.
-    if (!IsAssemblerInstruction(it->first, CGI, II->Tokens))
+    if (!IsAssemblerInstruction(CGI.TheDef->getName(), CGI, II->Tokens))
       continue;
 
     // Collect singleton registers, if used.
@@ -998,7 +999,7 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
 
   // Start the unified conversion function.
 
-  CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
+  CvtOS << "static void ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
         << "unsigned Opcode,\n"
         << "                      const SmallVectorImpl<MCParsedAsmOperand*"
         << "> &Operands) {\n";
@@ -1155,13 +1156,12 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
       }
     }
 
-    CvtOS << "    break;\n";
+    CvtOS << "    return;\n";
   }
 
   // Finish the convert function.
 
   CvtOS << "  }\n";
-  CvtOS << "  return false;\n";
   CvtOS << "}\n\n";
 
   // Finish the enum, and drop the convert function after it.
@@ -1634,8 +1634,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "      continue;\n";
   }
   OS << "\n";
-  OS << "    return ConvertToMCInst(it->ConvertFn, Inst, "
-     << "it->Opcode, Operands);\n";
+  OS << "    ConvertToMCInst(it->ConvertFn, Inst, it->Opcode, Operands);\n";
+
+  // Call the post-processing function, if used.
+  std::string InsnCleanupFn =
+    AsmParser->getValueAsString("AsmParserInstCleanup");
+  if (!InsnCleanupFn.empty())
+    OS << "    " << InsnCleanupFn << "(Inst);\n";
+
+  OS << "    return false;\n";
   OS << "  }\n\n";
 
   OS << "  return true;\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 3a38dd4..ab1e239 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -254,16 +254,16 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
   for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
          E = Target.inst_end(); I != E; ++I)
-    if (!I->second.AsmString.empty() &&
-        I->second.TheDef->getName() != "PHI")
+    if (!(*I)->AsmString.empty() &&
+        (*I)->TheDef->getName() != "PHI")
       Instructions.push_back(
-        AsmWriterInst(I->second, 
+        AsmWriterInst(**I, 
                       AsmWriter->getValueAsInt("Variant"),
                       AsmWriter->getValueAsInt("FirstOperandColumn"),
                       AsmWriter->getValueAsInt("OperandSpacing")));
 
   // Get the instruction numbering.
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  NumberedInstructions = Target.getInstructionsByEnumValue();
   
   // Compute the CodeGenInstruction -> AsmWriterInst mapping.  Note that not
   // all machine instructions are necessarily being printed, so there may be
@@ -499,8 +499,8 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
 
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
   
   StringToOffsetTable StringTable;
   O <<
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 6f1080e..27b1654 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -34,7 +34,7 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
     OS << "__" << ComponentName << "START = DIAG_START_" << ComponentName
        << ",\n";
     OS << "#undef " << ComponentName << "START\n";
-    OS << "#endif\n";
+    OS << "#endif\n\n";
   }
 
   const std::vector<Record*> &Diags =
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index f1857f5..641c224 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -86,8 +86,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
   EmitSourceFileHeader("Machine Code Emitter", o);
   std::string Namespace = Insts[0]->getValueAsString("Namespace") + "::";
   
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
 
   // Emit function declaration
   o << "unsigned " << Target.getName() << "CodeEmitter::"
@@ -95,7 +95,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
   // Emit instruction base values
   o << "  static const unsigned InstBits[] = {\n";
-  for (std::vector<const CodeGenInstruction*>::iterator
+  for (std::vector<const CodeGenInstruction*>::const_iterator
           IN = NumberedInstructions.begin(),
           EN = NumberedInstructions.end();
        IN != EN; ++IN) {
@@ -156,7 +156,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
     const std::vector<RecordVal> &Vals = R->getValues();
-    CodeGenInstruction &CGI = Target.getInstruction(InstName);
+    CodeGenInstruction &CGI = Target.getInstruction(R);
     
     // Loop over all of the fields in the instruction, determining which are the
     // operands to the instruction.
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 6e894a4..4cc9b79 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -25,19 +25,18 @@ using namespace llvm;
 //  EEVT::TypeSet Implementation
 //===----------------------------------------------------------------------===//
 
-// FIXME: Remove EEVT::isUnknown!
-
 static inline bool isInteger(MVT::SimpleValueType VT) {
   return EVT(VT).isInteger();
 }
-
 static inline bool isFloatingPoint(MVT::SimpleValueType VT) {
   return EVT(VT).isFloatingPoint();
 }
-
 static inline bool isVector(MVT::SimpleValueType VT) {
   return EVT(VT).isVector();
 }
+static inline bool isScalar(MVT::SimpleValueType VT) {
+  return !EVT(VT).isVector();
+}
 
 EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) {
   if (VT == MVT::iAny)
@@ -67,6 +66,32 @@ EEVT::TypeSet::TypeSet(const std::vector<MVT::SimpleValueType> &VTList) {
   TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
 }
 
+/// FillWithPossibleTypes - Set to all legal types and return true, only valid
+/// on completely unknown type sets.
+bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
+                                          bool (*Pred)(MVT::SimpleValueType),
+                                          const char *PredicateName) {
+  assert(isCompletelyUnknown());
+  const std::vector<MVT::SimpleValueType> &LegalTypes = 
+    TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
+  
+  for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
+    if (Pred == 0 || Pred(LegalTypes[i]))
+      TypeVec.push_back(LegalTypes[i]);
+
+  // If we have nothing that matches the predicate, bail out.
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, no " +
+             std::string(PredicateName) + " types found");  
+  // No need to sort with one element.
+  if (TypeVec.size() == 1) return true;
+
+  // Remove duplicates.
+  array_pod_sort(TypeVec.begin(), TypeVec.end());
+  TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
+  
+  return true;
+}
 
 /// hasIntegerTypes - Return true if this TypeSet contains iAny or an
 /// integer value type.
@@ -97,7 +122,7 @@ bool EEVT::TypeSet::hasVectorTypes() const {
 
 
 std::string EEVT::TypeSet::getName() const {
-  if (TypeVec.empty()) return "isUnknown";
+  if (TypeVec.empty()) return "<empty>";
   
   std::string Result;
     
@@ -200,94 +225,84 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
 
 /// EnforceInteger - Remove all non-integer types from this set.
 bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-  
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isInteger, "integer");
   if (!hasFloatingPointTypes())
-    return MadeChange;
+    return false;
+
+  TypeSet InputSet(*this);
   
   // Filter out all the fp types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (isFloatingPoint(TypeVec[i]))
+    if (!isInteger(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be integer");
-  return MadeChange;
+  return true;
 }
 
 /// EnforceFloatingPoint - Remove all integer types from this set.
 bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-  
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isFloatingPoint, "floating point");
+
   if (!hasIntegerTypes())
-    return MadeChange;
+    return false;
+
+  TypeSet InputSet(*this);
   
   // Filter out all the fp types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (isInteger(TypeVec[i]))
+    if (!isFloatingPoint(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be floating point");
-  return MadeChange;
+  return true;
 }
 
 /// EnforceScalar - Remove all vector types from this.
 bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-  
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isScalar, "scalar");
+
   if (!hasVectorTypes())
-    return MadeChange;
+    return false;
+
+  TypeSet InputSet(*this);
   
   // Filter out all the vector types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (isVector(TypeVec[i]))
+    if (!isScalar(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be scalar");
-  return MadeChange;
+  return true;
 }
 
 /// EnforceVector - Remove all vector types from this.
 bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
+  // If we know nothing, then get the full set.
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isVector, "vector");
+
   TypeSet InputSet(*this);
   bool MadeChange = false;
   
-  // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
   // Filter out all the scalar types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (!isVector(TypeVec[i]))
+    if (!isVector(TypeVec[i])) {
       TypeVec.erase(TypeVec.begin()+i--);
+      MadeChange = true;
+    }
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
@@ -296,72 +311,86 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
 }
 
 
+
 /// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
 /// this an other based on this information.
 bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
   // Both operands must be integer or FP, but we don't care which.
   bool MadeChange = false;
   
-  // This code does not currently handle nodes which have multiple types,
-  // where some types are integer, and some are fp.  Assert that this is not
-  // the case.
-  assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
-         !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
-         "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
+  if (isCompletelyUnknown())
+    MadeChange = FillWithPossibleTypes(TP);
+
+  if (Other.isCompletelyUnknown())
+    MadeChange = Other.FillWithPossibleTypes(TP);
+    
   // If one side is known to be integer or known to be FP but the other side has
   // no information, get at least the type integrality info in there.
-  if (hasIntegerTypes())
+  if (!hasFloatingPointTypes())
     MadeChange |= Other.EnforceInteger(TP);
-  else if (hasFloatingPointTypes())
+  else if (!hasIntegerTypes())
     MadeChange |= Other.EnforceFloatingPoint(TP);
-  if (Other.hasIntegerTypes())
+  if (!Other.hasFloatingPointTypes())
     MadeChange |= EnforceInteger(TP);
-  else if (Other.hasFloatingPointTypes())
+  else if (!Other.hasIntegerTypes())
     MadeChange |= EnforceFloatingPoint(TP);
   
   assert(!isCompletelyUnknown() && !Other.isCompletelyUnknown() &&
          "Should have a type list now");
   
   // If one contains vectors but the other doesn't pull vectors out.
-  if (!hasVectorTypes() && Other.hasVectorTypes())
+  if (!hasVectorTypes())
     MadeChange |= Other.EnforceScalar(TP);
-  if (hasVectorTypes() && !Other.hasVectorTypes())
+  if (!hasVectorTypes())
     MadeChange |= EnforceScalar(TP);
   
-  // FIXME: This is a bone-headed way to do this.
+  // This code does not currently handle nodes which have multiple types,
+  // where some types are integer, and some are fp.  Assert that this is not
+  // the case.
+  assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
+         !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
+         "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
   
-  // Get the set of legal VTs and filter it based on the known integrality.
-  const CodeGenTarget &CGT = TP.getDAGPatterns().getTargetInfo();
-  TypeSet LegalVTs = CGT.getLegalValueTypes();
-
-  // TODO: If one or the other side is known to be a specific VT, we could prune
-  // LegalVTs.
-  if (hasIntegerTypes())
-    LegalVTs.EnforceInteger(TP);
-  else if (hasFloatingPointTypes())
-    LegalVTs.EnforceFloatingPoint(TP);
-  else
-    return MadeChange;
+  // Okay, find the smallest type from the current set and remove it from the
+  // largest set.
+  MVT::SimpleValueType Smallest = TypeVec[0];
+  for (unsigned i = 1, e = TypeVec.size(); i != e; ++i)
+    if (TypeVec[i] < Smallest)
+      Smallest = TypeVec[i];
   
-  switch (LegalVTs.TypeVec.size()) {
-  case 0: assert(0 && "No legal VTs?");
-  default:         // Too many VT's to pick from.
-    // TODO: If the biggest type in LegalVTs is in this set, we could remove it.
-    // If one or the other side is known to be a specific VT, we could prune
-    // LegalVTs.
-    return MadeChange;
-  case 1: 
-    // Only one VT of this flavor.  Cannot ever satisfy the constraints.
-    return MergeInTypeInfo(MVT::Other, TP);  // throw
-  case 2:
-    // If we have exactly two possible types, the little operand must be the
-    // small one, the big operand should be the big one.  This is common with 
-    // float/double for example.
-    assert(LegalVTs.TypeVec[0] < LegalVTs.TypeVec[1] && "Should be sorted!");
-    MadeChange |= MergeInTypeInfo(LegalVTs.TypeVec[0], TP);
-    MadeChange |= Other.MergeInTypeInfo(LegalVTs.TypeVec[1], TP);
-    return MadeChange;
-  }    
+  // If this is the only type in the large set, the constraint can never be
+  // satisfied.
+  if (Other.TypeVec.size() == 1 && Other.TypeVec[0] == Smallest)
+    TP.error("Type inference contradiction found, '" +
+             Other.getName() + "' has nothing larger than '" + getName() +"'!");
+  
+  SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
+    std::find(Other.TypeVec.begin(), Other.TypeVec.end(), Smallest);
+  if (TVI != Other.TypeVec.end()) {
+    Other.TypeVec.erase(TVI);
+    MadeChange = true;
+  }
+  
+  // Okay, find the largest type in the Other set and remove it from the
+  // current set.
+  MVT::SimpleValueType Largest = Other.TypeVec[0];
+  for (unsigned i = 1, e = Other.TypeVec.size(); i != e; ++i)
+    if (Other.TypeVec[i] > Largest)
+      Largest = Other.TypeVec[i];
+  
+  // If this is the only type in the small set, the constraint can never be
+  // satisfied.
+  if (TypeVec.size() == 1 && TypeVec[0] == Largest)
+    TP.error("Type inference contradiction found, '" +
+             getName() + "' has nothing smaller than '" + Other.getName()+"'!");
+  
+  TVI = std::find(TypeVec.begin(), TypeVec.end(), Largest);
+  if (TVI != TypeVec.end()) {
+    TypeVec.erase(TVI);
+    MadeChange = true;
+  }
+  
+  return MadeChange;
 }
 
 /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
@@ -372,10 +401,8 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
   bool MadeChange = false;
   
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
+  if (TypeVec.empty())
+    MadeChange = FillWithPossibleTypes(TP, isVector, "vector");
   
   // Filter out all the non-vector types and types which don't have the right
   // element type.
@@ -511,24 +538,27 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
 }
 
 /// getOperandNum - Return the node corresponding to operand #OpNo in tree
-/// N, which has NumResults results.
-TreePatternNode *SDTypeConstraint::getOperandNum(unsigned OpNo,
-                                                 TreePatternNode *N,
-                                                 unsigned NumResults) const {
-  assert(NumResults <= 1 &&
-         "We only work with nodes with zero or one result so far!");
+/// N, and the result number in ResNo.
+static TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N,
+                                      const SDNodeInfo &NodeInfo,
+                                      unsigned &ResNo) {
+  unsigned NumResults = NodeInfo.getNumResults();
+  if (OpNo < NumResults) {
+    ResNo = OpNo;
+    return N;
+  }
+  
+  OpNo -= NumResults;
   
-  if (OpNo >= (NumResults + N->getNumChildren())) {
-    errs() << "Invalid operand number " << OpNo << " ";
+  if (OpNo >= N->getNumChildren()) {
+    errs() << "Invalid operand number in type constraint " 
+           << (OpNo+NumResults) << " ";
     N->dump();
     errs() << '\n';
     exit(1);
   }
 
-  if (OpNo < NumResults)
-    return N;  // FIXME: need value #
-  else
-    return N->getChild(OpNo-NumResults);
+  return N->getChild(OpNo);
 }
 
 /// ApplyTypeConstraint - Given a node in a pattern, apply this type
@@ -538,10 +568,6 @@ TreePatternNode *SDTypeConstraint::getOperandNum(unsigned OpNo,
 bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
                                            const SDNodeInfo &NodeInfo,
                                            TreePattern &TP) const {
-  unsigned NumResults = NodeInfo.getNumResults();
-  assert(NumResults <= 1 &&
-         "We only work with nodes with zero or one result so far!");
-  
   // Check that the number of operands is sane.  Negative operands -> varargs.
   if (NodeInfo.getNumOperands() >= 0) {
     if (N->getNumChildren() != (unsigned)NodeInfo.getNumOperands())
@@ -549,30 +575,32 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
                itostr(NodeInfo.getNumOperands()) + " operands!");
   }
 
-  TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NumResults);
+  unsigned ResNo = 0; // The result number being referenced.
+  TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
   
   switch (ConstraintType) {
   default: assert(0 && "Unknown constraint type!");
   case SDTCisVT:
     // Operand must be a particular type.
-    return NodeToApply->UpdateNodeType(x.SDTCisVT_Info.VT, TP);
+    return NodeToApply->UpdateNodeType(ResNo, x.SDTCisVT_Info.VT, TP);
   case SDTCisPtrTy:
     // Operand must be same as target pointer type.
-    return NodeToApply->UpdateNodeType(MVT::iPTR, TP);
+    return NodeToApply->UpdateNodeType(ResNo, MVT::iPTR, TP);
   case SDTCisInt:
     // Require it to be one of the legal integer VTs.
-    return NodeToApply->getExtType().EnforceInteger(TP);
+    return NodeToApply->getExtType(ResNo).EnforceInteger(TP);
   case SDTCisFP:
     // Require it to be one of the legal fp VTs.
-    return NodeToApply->getExtType().EnforceFloatingPoint(TP);
+    return NodeToApply->getExtType(ResNo).EnforceFloatingPoint(TP);
   case SDTCisVec:
     // Require it to be one of the legal vector VTs.
-    return NodeToApply->getExtType().EnforceVector(TP);
+    return NodeToApply->getExtType(ResNo).EnforceVector(TP);
   case SDTCisSameAs: {
+    unsigned OResNo = 0;
     TreePatternNode *OtherNode =
-      getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NumResults);
-    return NodeToApply->UpdateNodeType(OtherNode->getExtType(), TP) |
-           OtherNode->UpdateNodeType(NodeToApply->getExtType(), TP);
+      getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
+    return NodeToApply->UpdateNodeType(OResNo, OtherNode->getExtType(ResNo),TP)|
+           OtherNode->UpdateNodeType(ResNo,NodeToApply->getExtType(OResNo),TP);
   }
   case SDTCisVTSmallerThanOp: {
     // The NodeToApply must be a leaf node that is a VT.  OtherOperandNum must
@@ -587,40 +615,47 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     if (!isInteger(VT))
       TP.error(N->getOperator()->getName() + " VT operand must be integer!");
     
+    unsigned OResNo = 0;
     TreePatternNode *OtherNode =
-      getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N,NumResults);
+      getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
+                    OResNo);
     
     // It must be integer.
-    bool MadeChange = OtherNode->getExtType().EnforceInteger(TP);
+    bool MadeChange = OtherNode->getExtType(OResNo).EnforceInteger(TP);
 
     // This doesn't try to enforce any information on the OtherNode, it just
     // validates it when information is determined.
-    if (OtherNode->hasTypeSet() && OtherNode->getType() <= VT)
-      OtherNode->UpdateNodeType(MVT::Other, TP);  // Throw an error.
+    if (OtherNode->hasTypeSet(OResNo) && OtherNode->getType(OResNo) <= VT)
+      OtherNode->UpdateNodeType(OResNo, MVT::Other, TP);  // Throw an error.
     return MadeChange;
   }
   case SDTCisOpSmallerThanOp: {
+    unsigned BResNo = 0;
     TreePatternNode *BigOperand =
-      getOperandNum(x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NumResults);
-    return NodeToApply->getExtType().
-                  EnforceSmallerThan(BigOperand->getExtType(), TP);
+      getOperandNum(x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NodeInfo,
+                    BResNo);
+    return NodeToApply->getExtType(ResNo).
+                  EnforceSmallerThan(BigOperand->getExtType(BResNo), TP);
   }
   case SDTCisEltOfVec: {
+    unsigned VResNo = 0;
     TreePatternNode *VecOperand =
-      getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NumResults);
-    if (VecOperand->hasTypeSet()) {
-      if (!isVector(VecOperand->getType()))
+      getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NodeInfo,
+                    VResNo);
+    if (VecOperand->hasTypeSet(VResNo)) {
+      if (!isVector(VecOperand->getType(VResNo)))
         TP.error(N->getOperator()->getName() + " VT operand must be a vector!");
-      EVT IVT = VecOperand->getType();
+      EVT IVT = VecOperand->getType(VResNo);
       IVT = IVT.getVectorElementType();
-      return NodeToApply->UpdateNodeType(IVT.getSimpleVT().SimpleTy, TP);
+      return NodeToApply->UpdateNodeType(ResNo, IVT.getSimpleVT().SimpleTy, TP);
     }
     
-    if (NodeToApply->hasTypeSet() && VecOperand->getExtType().hasVectorTypes()){
+    if (NodeToApply->hasTypeSet(ResNo) &&
+        VecOperand->getExtType(VResNo).hasVectorTypes()){
       // Filter vector types out of VecOperand that don't have the right element
       // type.
-      return VecOperand->getExtType().
-        EnforceVectorEltTypeIs(NodeToApply->getType(), TP);
+      return VecOperand->getExtType(VResNo).
+        EnforceVectorEltTypeIs(NodeToApply->getType(ResNo), TP);
     }
     return false;
   }
@@ -662,6 +697,8 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
       Properties |= 1 << SDNPSideEffect;
     } else if (PropList[i]->getName() == "SDNPMemOperand") {
       Properties |= 1 << SDNPMemOperand;
+    } else if (PropList[i]->getName() == "SDNPVariadic") {
+      Properties |= 1 << SDNPVariadic;
     } else {
       errs() << "Unknown SD Node property '" << PropList[i]->getName()
              << "' on node '" << R->getName() << "'!\n";
@@ -678,8 +715,8 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
 
 /// getKnownType - If the type constraints on this node imply a fixed type
 /// (e.g. all stores return void, etc), then return it as an
-/// MVT::SimpleValueType.  Otherwise, return EEVT::isUnknown.
-unsigned SDNodeInfo::getKnownType() const {
+/// MVT::SimpleValueType.  Otherwise, return EEVT::Other.
+MVT::SimpleValueType SDNodeInfo::getKnownType() const {
   unsigned NumResults = getNumResults();
   assert(NumResults <= 1 &&
          "We only work with nodes with zero or one result so far!");
@@ -697,7 +734,7 @@ unsigned SDNodeInfo::getKnownType() const {
       return MVT::iPTR;
     }
   }
-  return EEVT::isUnknown;
+  return MVT::Other;
 }
 
 //===----------------------------------------------------------------------===//
@@ -711,17 +748,73 @@ TreePatternNode::~TreePatternNode() {
 #endif
 }
 
-
+static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
+  if (Operator->getName() == "set" ||
+      Operator->getName() == "implicit" ||
+      Operator->getName() == "parallel")
+    return 0;  // All return nothing.
+  
+  if (Operator->isSubClassOf("Intrinsic")) {
+    unsigned NumRes = CDP.getIntrinsic(Operator).IS.RetVTs.size();
+    if (NumRes == 1 && CDP.getIntrinsic(Operator).IS.RetVTs[0] == MVT::isVoid)
+      return 0;
+    return NumRes;
+  }
+  
+  if (Operator->isSubClassOf("SDNode"))
+    return CDP.getSDNodeInfo(Operator).getNumResults();
+  
+  if (Operator->isSubClassOf("PatFrag")) {
+    // If we've already parsed this pattern fragment, get it.  Otherwise, handle
+    // the forward reference case where one pattern fragment references another
+    // before it is processed.
+    if (TreePattern *PFRec = CDP.getPatternFragmentIfRead(Operator))
+      return PFRec->getOnlyTree()->getNumTypes();
+    
+    // Get the result tree.
+    DagInit *Tree = Operator->getValueAsDag("Fragment");
+    Record *Op = 0;
+    if (Tree && dynamic_cast<DefInit*>(Tree->getOperator()))
+      Op = dynamic_cast<DefInit*>(Tree->getOperator())->getDef();
+    assert(Op && "Invalid Fragment");
+    return GetNumNodeResults(Op, CDP);
+  }
+  
+  if (Operator->isSubClassOf("Instruction")) {
+    CodeGenInstruction &InstInfo = CDP.getTargetInfo().getInstruction(Operator);
+    
+    // FIXME: Handle implicit defs right.
+    if (InstInfo.NumDefs != 0)
+      return 1;     // FIXME: Handle inst results right!
+    
+    if (!InstInfo.ImplicitDefs.empty()) {
+      // Add on one implicit def if it has a resolvable type.
+      Record *FirstImplicitDef = InstInfo.ImplicitDefs[0];
+      assert(FirstImplicitDef->isSubClassOf("Register"));
+      const std::vector<MVT::SimpleValueType> &RegVTs = 
+      CDP.getTargetInfo().getRegisterVTs(FirstImplicitDef);
+      if (RegVTs.size() == 1)
+        return 1;
+    }
+    return 0;
+  }
+  
+  if (Operator->isSubClassOf("SDNodeXForm"))
+    return 1;  // FIXME: Generalize SDNodeXForm
+  
+  Operator->dump();
+  errs() << "Unhandled node in GetNumNodeResults\n";
+  exit(1);
+}
 
 void TreePatternNode::print(raw_ostream &OS) const {
-  if (isLeaf()) {
+  if (isLeaf())
     OS << *getLeafValue();
-  } else {
+  else
     OS << '(' << getOperator()->getName();
-  }
-  
-  if (!isTypeCompletelyUnknown())
-    OS << ':' << getExtType().getName();
+
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    OS << ':' << getExtType(i).getName();
 
   if (!isLeaf()) {
     if (getNumChildren() != 0) {
@@ -757,7 +850,7 @@ void TreePatternNode::dump() const {
 bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
                                      const MultipleUseVarSet &DepVars) const {
   if (N == this) return true;
-  if (N->isLeaf() != isLeaf() || getExtType() != N->getExtType() ||
+  if (N->isLeaf() != isLeaf() || getExtTypes() != N->getExtTypes() ||
       getPredicateFns() != N->getPredicateFns() ||
       getTransformFn() != N->getTransformFn())
     return false;
@@ -786,16 +879,16 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
 TreePatternNode *TreePatternNode::clone() const {
   TreePatternNode *New;
   if (isLeaf()) {
-    New = new TreePatternNode(getLeafValue());
+    New = new TreePatternNode(getLeafValue(), getNumTypes());
   } else {
     std::vector<TreePatternNode*> CChildren;
     CChildren.reserve(Children.size());
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       CChildren.push_back(getChild(i)->clone());
-    New = new TreePatternNode(getOperator(), CChildren);
+    New = new TreePatternNode(getOperator(), CChildren, getNumTypes());
   }
   New->setName(getName());
-  New->setType(getExtType());
+  New->Types = Types;
   New->setPredicateFns(getPredicateFns());
   New->setTransformFn(getTransformFn());
   return New;
@@ -803,7 +896,8 @@ TreePatternNode *TreePatternNode::clone() const {
 
 /// RemoveAllTypes - Recursively strip all the types of this tree.
 void TreePatternNode::RemoveAllTypes() {
-  setType(EEVT::TypeSet());  // Reset to unknown type.
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    Types[i] = EEVT::TypeSet();  // Reset to unknown type.
   if (isLeaf()) return;
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
     getChild(i)->RemoveAllTypes();
@@ -885,7 +979,8 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
   }
   
   FragTree->setName(getName());
-  FragTree->UpdateNodeType(getExtType(), TP);
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    FragTree->UpdateNodeType(i, getExtType(i), TP);
 
   // Transfer in the old predicates.
   for (unsigned i = 0, e = getPredicateFns().size(); i != e; ++i)
@@ -903,8 +998,10 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 /// type which should be applied to it.  This will infer the type of register
 /// references from the register file information, for example.
 ///
-static EEVT::TypeSet getImplicitType(Record *R, bool NotRegisters,
-                                     TreePattern &TP) {
+static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
+                                     bool NotRegisters, TreePattern &TP) {
+  assert(ResNo == 0 && "FIXME: Unhandled result number");
+  
   // Check to see if this is a register or a register class.
   if (R->isSubClassOf("RegisterClass")) {
     if (NotRegisters) 
@@ -1015,17 +1112,23 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
   if (isLeaf()) {
     if (DefInit *DI = dynamic_cast<DefInit*>(getLeafValue())) {
       // If it's a regclass or something else known, include the type.
-      return UpdateNodeType(getImplicitType(DI->getDef(), NotRegisters, TP),TP);
+      bool MadeChange = false;
+      for (unsigned i = 0, e = Types.size(); i != e; ++i)
+        MadeChange |= UpdateNodeType(i, getImplicitType(DI->getDef(), i,
+                                                        NotRegisters, TP), TP);
+      return MadeChange;
     }
     
     if (IntInit *II = dynamic_cast<IntInit*>(getLeafValue())) {
+      assert(Types.size() == 1 && "Invalid IntInit");
+      
       // Int inits are always integers. :)
-      bool MadeChange = Type.EnforceInteger(TP);
+      bool MadeChange = Types[0].EnforceInteger(TP);
       
-      if (!hasTypeSet())
+      if (!Types[0].isConcrete())
         return MadeChange;
       
-      MVT::SimpleValueType VT = getType();
+      MVT::SimpleValueType VT = getType(0);
       if (VT == MVT::iPTR || VT == MVT::iPTRAny)
         return MadeChange;
       
@@ -1046,7 +1149,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
         return MadeChange;
       
       TP.error("Integer value '" + itostr(II->getValue())+
-               "' is out of range for type '" + getEnumName(getType()) + "'!");
+               "' is out of range for type '" + getEnumName(getType(0)) + "'!");
       return MadeChange;
     }
     return false;
@@ -1054,27 +1157,31 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
   
   // special handling for set, which isn't really an SDNode.
   if (getOperator()->getName() == "set") {
-    assert (getNumChildren() >= 2 && "Missing RHS of a set?");
+    assert(getNumTypes() == 0 && "Set doesn't produce a value");
+    assert(getNumChildren() >= 2 && "Missing RHS of a set?");
     unsigned NC = getNumChildren();
-    bool MadeChange = false;
+    
+    TreePatternNode *SetVal = getChild(NC-1);
+    bool MadeChange = SetVal->ApplyTypeConstraints(TP, NotRegisters);
+
     for (unsigned i = 0; i < NC-1; ++i) {
-      MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-      MadeChange |= getChild(NC-1)->ApplyTypeConstraints(TP, NotRegisters);
+      TreePatternNode *Child = getChild(i);
+      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
     
       // Types of operands must match.
-      MadeChange |=getChild(i)->UpdateNodeType(getChild(NC-1)->getExtType(),TP);
-      MadeChange |=getChild(NC-1)->UpdateNodeType(getChild(i)->getExtType(),TP);
-      MadeChange |=UpdateNodeType(MVT::isVoid, TP);
+      MadeChange |= Child->UpdateNodeType(0, SetVal->getExtType(i), TP);
+      MadeChange |= SetVal->UpdateNodeType(i, Child->getExtType(0), TP);
     }
     return MadeChange;
   }
   
   if (getOperator()->getName() == "implicit" ||
       getOperator()->getName() == "parallel") {
+    assert(getNumTypes() == 0 && "Node doesn't produce a value");
+
     bool MadeChange = false;
     for (unsigned i = 0; i < getNumChildren(); ++i)
       MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-    MadeChange |= UpdateNodeType(MVT::isVoid, TP);
     return MadeChange;
   }
   
@@ -1083,13 +1190,16 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
     MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
     
+    assert(getChild(0)->getNumTypes() == 1 &&
+           getChild(1)->getNumTypes() == 1 && "Unhandled case");
+    
     // child #1 of COPY_TO_REGCLASS should be a register class.  We don't care
     // what type it gets, so if it didn't get a concrete type just give it the
     // first viable type from the reg class.
-    if (!getChild(1)->hasTypeSet() &&
-        !getChild(1)->getExtType().isCompletelyUnknown()) {
-      MVT::SimpleValueType RCVT = getChild(1)->getExtType().getTypeList()[0];
-      MadeChange |= getChild(1)->UpdateNodeType(RCVT, TP);
+    if (!getChild(1)->hasTypeSet(0) &&
+        !getChild(1)->getExtType(0).isCompletelyUnknown()) {
+      MVT::SimpleValueType RCVT = getChild(1)->getExtType(0).getTypeList()[0];
+      MadeChange |= getChild(1)->UpdateNodeType(0, RCVT, TP);
     }
     return MadeChange;
   }
@@ -1100,22 +1210,26 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     // Apply the result type to the node.
     unsigned NumRetVTs = Int->IS.RetVTs.size();
     unsigned NumParamVTs = Int->IS.ParamVTs.size();
-
+    if (NumRetVTs == 1 && Int->IS.RetVTs[0] == MVT::isVoid)
+      NumRetVTs = 0;
+    
     for (unsigned i = 0, e = NumRetVTs; i != e; ++i)
-      MadeChange |= UpdateNodeType(Int->IS.RetVTs[i], TP);
+      MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP);
 
-    if (getNumChildren() != NumParamVTs + NumRetVTs)
+    if (getNumChildren() != NumParamVTs + 1)
       TP.error("Intrinsic '" + Int->Name + "' expects " +
-               utostr(NumParamVTs + NumRetVTs - 1) + " operands, not " +
+               utostr(NumParamVTs) + " operands, not " +
                utostr(getNumChildren() - 1) + " operands!");
 
     // Apply type info to the intrinsic ID.
-    MadeChange |= getChild(0)->UpdateNodeType(MVT::iPTR, TP);
+    MadeChange |= getChild(0)->UpdateNodeType(0, MVT::iPTR, TP);
     
-    for (unsigned i = NumRetVTs, e = getNumChildren(); i != e; ++i) {
-      MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i - NumRetVTs];
-      MadeChange |= getChild(i)->UpdateNodeType(OpVT, TP);
-      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
+    for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i) {
+      MadeChange |= getChild(i+1)->ApplyTypeConstraints(TP, NotRegisters);
+      
+      MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i];
+      assert(getChild(i+1)->getNumTypes() == 1 && "Unhandled case");
+      MadeChange |= getChild(i+1)->UpdateNodeType(0, OpVT, TP);
     }
     return MadeChange;
   }
@@ -1126,51 +1240,60 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     bool MadeChange = NI.ApplyTypeConstraints(this, TP);
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-    // Branch, etc. do not produce results and top-level forms in instr pattern
-    // must have void types.
-    if (NI.getNumResults() == 0)
-      MadeChange |= UpdateNodeType(MVT::isVoid, TP);
-    
-    return MadeChange;  
+    return MadeChange;
   }
   
   if (getOperator()->isSubClassOf("Instruction")) {
     const DAGInstruction &Inst = CDP.getInstruction(getOperator());
-    bool MadeChange = false;
-    unsigned NumResults = Inst.getNumResults();
-    
-    assert(NumResults <= 1 &&
-           "Only supports zero or one result instrs!");
+    unsigned ResNo = 0;
+    assert(Inst.getNumResults() <= 1 &&
+           "FIXME: Only supports zero or one result instrs!");
 
     CodeGenInstruction &InstInfo =
-      CDP.getTargetInfo().getInstruction(getOperator()->getName());
+      CDP.getTargetInfo().getInstruction(getOperator());
+    
+    EEVT::TypeSet ResultType;
+    
     // Apply the result type to the node
-    if (NumResults == 0 || InstInfo.NumDefs == 0) {
-      MadeChange = UpdateNodeType(MVT::isVoid, TP);
-    } else {
+    if (InstInfo.NumDefs != 0) { // # of elements in (outs) list
       Record *ResultNode = Inst.getResult(0);
       
       if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
-        MadeChange = UpdateNodeType(MVT::iPTR, TP);
+        ResultType = EEVT::TypeSet(MVT::iPTR, TP);
       } else if (ResultNode->getName() == "unknown") {
         // Nothing to do.
       } else {
         assert(ResultNode->isSubClassOf("RegisterClass") &&
                "Operands should be register classes!");
-
         const CodeGenRegisterClass &RC = 
           CDP.getTargetInfo().getRegisterClass(ResultNode);
-        MadeChange = UpdateNodeType(RC.getValueTypes(), TP);
+        ResultType = RC.getValueTypes();
       }
+    } else if (!InstInfo.ImplicitDefs.empty()) {
+      // If the instruction has implicit defs, the first one defines the result
+      // type.
+      Record *FirstImplicitDef = InstInfo.ImplicitDefs[0];
+      assert(FirstImplicitDef->isSubClassOf("Register"));
+      const std::vector<MVT::SimpleValueType> &RegVTs = 
+        CDP.getTargetInfo().getRegisterVTs(FirstImplicitDef);
+      if (RegVTs.size() == 1)   // FIXME: Generalize.
+        ResultType = EEVT::TypeSet(RegVTs);
+    } else {
+      // Otherwise, the instruction produces no value result.
     }
     
+    bool MadeChange = false;
+    
+    if (!ResultType.isCompletelyUnknown())
+      MadeChange |= UpdateNodeType(ResNo, ResultType, TP);
+    
     // If this is an INSERT_SUBREG, constrain the source and destination VTs to
     // be the same.
     if (getOperator()->getName() == "INSERT_SUBREG") {
-      MadeChange |= UpdateNodeType(getChild(0)->getExtType(), TP);
-      MadeChange |= getChild(0)->UpdateNodeType(getExtType(), TP);
+      assert(getChild(0)->getNumTypes() == 1 && "FIXME: Unhandled");
+      MadeChange |= UpdateNodeType(0, getChild(0)->getExtType(0), TP);
+      MadeChange |= getChild(0)->UpdateNodeType(0, getExtType(0), TP);
     }
-    
 
     unsigned ChildNo = 0;
     for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
@@ -1191,15 +1314,17 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       
       MVT::SimpleValueType VT;
       TreePatternNode *Child = getChild(ChildNo++);
+      assert(Child->getNumTypes() == 1 && "Unknown case?");
+      
       if (OperandNode->isSubClassOf("RegisterClass")) {
         const CodeGenRegisterClass &RC = 
           CDP.getTargetInfo().getRegisterClass(OperandNode);
-        MadeChange |= Child->UpdateNodeType(RC.getValueTypes(), TP);
+        MadeChange |= Child->UpdateNodeType(0, RC.getValueTypes(), TP);
       } else if (OperandNode->isSubClassOf("Operand")) {
         VT = getValueType(OperandNode->getValueAsDef("Type"));
-        MadeChange |= Child->UpdateNodeType(VT, TP);
+        MadeChange |= Child->UpdateNodeType(0, VT, TP);
       } else if (OperandNode->isSubClassOf("PointerLikeRegClass")) {
-        MadeChange |= Child->UpdateNodeType(MVT::iPTR, TP);
+        MadeChange |= Child->UpdateNodeType(0, MVT::iPTR, TP);
       } else if (OperandNode->getName() == "unknown") {
         // Nothing to do.
       } else {
@@ -1331,6 +1456,7 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
     ComputeNamedNodes(N->getChild(i));
 }
 
+
 TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
   DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
   if (!OpDef) error("Pattern has unexpected operator type!");
@@ -1359,11 +1485,11 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
         Args.push_back(Dag->getArgName(0));
       }
       
-      New = new TreePatternNode(DI);
+      New = new TreePatternNode(DI, 1);
     } else if (DagInit *DI = dynamic_cast<DagInit*>(Arg)) {
       New = ParseTreePattern(DI);
     } else if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
-      New = new TreePatternNode(II);
+      New = new TreePatternNode(II, 1);
       if (!Dag->getArgName(0).empty())
         error("Constant int argument should not have a name!");
     } else if (BitsInit *BI = dynamic_cast<BitsInit*>(Arg)) {
@@ -1372,7 +1498,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
       if (II == 0 || !dynamic_cast<IntInit*>(II))
         error("Bits value must be constants!");
       
-      New = new TreePatternNode(dynamic_cast<IntInit*>(II));
+      New = new TreePatternNode(dynamic_cast<IntInit*>(II), 1);
       if (!Dag->getArgName(0).empty())
         error("Constant int argument should not have a name!");
     } else {
@@ -1382,7 +1508,8 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
     }
     
     // Apply the type cast.
-    New->UpdateNodeType(getValueType(Operator), *this);
+    assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
+    New->UpdateNodeType(0, getValueType(Operator), *this);
     if (New->getNumChildren() == 0)
       New->setName(Dag->getArgName(0));
     return New;
@@ -1421,7 +1548,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
                               std::vector<std::pair<Init*, std::string> >()));
         --i;  // Revisit this node...
       } else {
-        TreePatternNode *Node = new TreePatternNode(DefI);
+        TreePatternNode *Node = new TreePatternNode(DefI, 1);
         Node->setName(Dag->getArgName(i));
         Children.push_back(Node);
         
@@ -1433,7 +1560,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
         }
       }
     } else if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
-      TreePatternNode *Node = new TreePatternNode(II);
+      TreePatternNode *Node = new TreePatternNode(II, 1);
       if (!Dag->getArgName(i).empty())
         error("Constant int argument should not have a name!");
       Children.push_back(Node);
@@ -1443,7 +1570,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
       if (II == 0 || !dynamic_cast<IntInit*>(II))
         error("Bits value must be constants!");
       
-      TreePatternNode *Node = new TreePatternNode(dynamic_cast<IntInit*>(II));
+      TreePatternNode *Node = new TreePatternNode(dynamic_cast<IntInit*>(II),1);
       if (!Dag->getArgName(i).empty())
         error("Constant int argument should not have a name!");
       Children.push_back(Node);
@@ -1474,11 +1601,12 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
       Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
     }
     
-    TreePatternNode *IIDNode = new TreePatternNode(new IntInit(IID));
+    TreePatternNode *IIDNode = new TreePatternNode(new IntInit(IID), 1);
     Children.insert(Children.begin(), IIDNode);
   }
   
-  TreePatternNode *Result = new TreePatternNode(Operator, Children);
+  unsigned NumResults = GetNumNodeResults(Operator, CDP);
+  TreePatternNode *Result = new TreePatternNode(Operator, Children, NumResults);
   Result->setName(Dag->getName());
   return Result;
 }
@@ -1525,7 +1653,11 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
               continue;
           }
           
-          MadeChange |=Nodes[i]->UpdateNodeType(InNodes[0]->getExtType(),*this);
+          assert(Nodes[i]->getNumTypes() == 1 &&
+                 InNodes[0]->getNumTypes() == 1 &&
+                 "FIXME: cannot name multiple result nodes yet");
+          MadeChange |= Nodes[i]->UpdateNodeType(0, InNodes[0]->getExtType(0),
+                                                 *this);
         }
       }
       
@@ -1533,8 +1665,12 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
       // same type.
       if (I->second.size() > 1) {
         for (unsigned i = 0, e = Nodes.size()-1; i != e; ++i) {
-          MadeChange |=Nodes[i]->UpdateNodeType(Nodes[i+1]->getExtType(),*this);
-          MadeChange |=Nodes[i+1]->UpdateNodeType(Nodes[i]->getExtType(),*this);
+          TreePatternNode *N1 = Nodes[i], *N2 = Nodes[i+1];
+          assert(N1->getNumTypes() == 1 && N2->getNumTypes() == 1 &&
+                 "FIXME: cannot name multiple result nodes yet");
+          
+          MadeChange |= N1->UpdateNodeType(0, N2->getExtType(0), *this);
+          MadeChange |= N2->UpdateNodeType(0, N1->getExtType(0), *this);
         }
       }
     }
@@ -1832,7 +1968,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   // Ensure that the inputs agree if we've already seen this input.
   if (Rec != SlotRec)
     I->error("All $" + Pat->getName() + " inputs must agree with each other");
-  if (Slot->getExtType() != Pat->getExtType())
+  if (Slot->getExtTypes() != Pat->getExtTypes())
     I->error("All $" + Pat->getName() + " inputs must agree with each other");
   return true;
 }
@@ -1871,7 +2007,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     // If this is not a set, verify that the children nodes are not void typed,
     // and recurse.
     for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
-      if (Pat->getChild(i)->getType() == MVT::isVoid)
+      if (Pat->getChild(i)->getNumTypes() == 0)
         I->error("Cannot have void nodes inside of patterns!");
       FindPatternInputsAndOutputs(I, Pat->getChild(i), InstInputs, InstResults,
                                   InstImpInputs, InstImpResults);
@@ -1933,10 +2069,12 @@ class InstAnalyzer {
   bool &mayStore;
   bool &mayLoad;
   bool &HasSideEffects;
+  bool &IsVariadic;
 public:
   InstAnalyzer(const CodeGenDAGPatterns &cdp,
-               bool &maystore, bool &mayload, bool &hse)
-    : CDP(cdp), mayStore(maystore), mayLoad(mayload), HasSideEffects(hse){
+               bool &maystore, bool &mayload, bool &hse, bool &isv)
+    : CDP(cdp), mayStore(maystore), mayLoad(mayload), HasSideEffects(hse),
+      IsVariadic(isv) {
   }
 
   /// Analyze - Analyze the specified instruction, returning true if the
@@ -1985,6 +2123,7 @@ private:
     if (OpInfo.hasProperty(SDNPMayStore)) mayStore = true;
     if (OpInfo.hasProperty(SDNPMayLoad)) mayLoad = true;
     if (OpInfo.hasProperty(SDNPSideEffect)) HasSideEffects = true;
+    if (OpInfo.hasProperty(SDNPVariadic)) IsVariadic = true;
 
     if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
       // If this is an intrinsic, analyze it.
@@ -2004,12 +2143,13 @@ private:
 
 static void InferFromPattern(const CodeGenInstruction &Inst,
                              bool &MayStore, bool &MayLoad,
-                             bool &HasSideEffects,
+                             bool &HasSideEffects, bool &IsVariadic,
                              const CodeGenDAGPatterns &CDP) {
-  MayStore = MayLoad = HasSideEffects = false;
+  MayStore = MayLoad = HasSideEffects = IsVariadic = false;
 
   bool HadPattern =
-    InstAnalyzer(CDP, MayStore, MayLoad, HasSideEffects).Analyze(Inst.TheDef);
+    InstAnalyzer(CDP, MayStore, MayLoad, HasSideEffects, IsVariadic)
+    .Analyze(Inst.TheDef);
 
   // InstAnalyzer only correctly analyzes mayStore/mayLoad so far.
   if (Inst.mayStore) {  // If the .td file explicitly sets mayStore, use it.
@@ -2047,6 +2187,9 @@ static void InferFromPattern(const CodeGenInstruction &Inst,
               "which already inferred this.\n", Inst.TheDef->getName().c_str());
     HasSideEffects = true;
   }
+  
+  if (Inst.isVariadic)
+    IsVariadic = true;  // Can warn if we want.
 }
 
 /// ParseInstructions - Parse all of the instructions, inlining and resolving
@@ -2068,7 +2211,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
       std::vector<Record*> Results;
       std::vector<Record*> Operands;
       
-      CodeGenInstruction &InstInfo =Target.getInstruction(Instrs[i]->getName());
+      CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
 
       if (InstInfo.OperandList.size() != 0) {
         if (InstInfo.NumDefs == 0) {
@@ -2119,7 +2262,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
     // fill in the InstResults map.
     for (unsigned j = 0, e = I->getNumTrees(); j != e; ++j) {
       TreePatternNode *Pat = I->getTree(j);
-      if (!Pat->hasTypeSet() || Pat->getType() != MVT::isVoid)
+      if (Pat->getNumTypes() != 0)
         I->error("Top-level forms in instruction pattern should have"
                  " void types");
 
@@ -2135,11 +2278,11 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
     // Parse the operands list from the (ops) list, validating it.
     assert(I->getArgList().empty() && "Args list should still be empty here!");
-    CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]->getName());
+    CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]);
 
     // Check that all of the results occur first in the list.
     std::vector<Record*> Results;
-    TreePatternNode *Res0Node = NULL;
+    TreePatternNode *Res0Node = 0;
     for (unsigned i = 0; i != NumResults; ++i) {
       if (i == CGI.OperandList.size())
         I->error("'" + InstResults.begin()->first +
@@ -2217,7 +2360,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
         OpNode->setTransformFn(0);
         std::vector<TreePatternNode*> Children;
         Children.push_back(OpNode);
-        OpNode = new TreePatternNode(Xform, Children);
+        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
       }
       
       ResultNodeOperands.push_back(OpNode);
@@ -2228,10 +2371,11 @@ void CodeGenDAGPatterns::ParseInstructions() {
                " occurs in pattern but not in operands list!");
 
     TreePatternNode *ResultPattern =
-      new TreePatternNode(I->getRecord(), ResultNodeOperands);
+      new TreePatternNode(I->getRecord(), ResultNodeOperands,
+                          GetNumNodeResults(I->getRecord(), *this));
     // Copy fully inferred output node type to instruction result pattern.
-    if (NumResults > 0)
-      ResultPattern->setType(Res0Node->getExtType());
+    for (unsigned i = 0; i != NumResults; ++i)
+      ResultPattern->setType(i, Res0Node->getExtType(i));
 
     // Create and insert the instruction.
     // FIXME: InstImpResults and InstImpInputs should not be part of
@@ -2292,7 +2436,7 @@ static void FindNames(const TreePatternNode *P,
     // If this is the first instance of the name, remember the node.
     if (Rec.second++ == 0)
       Rec.first = P;
-    else if (Rec.first->getType() != P->getType())
+    else if (Rec.first->getExtTypes() != P->getExtTypes())
       PatternTop->error("repetition of value: $" + P->getName() +
                         " where different uses have different types!");
   }
@@ -2347,17 +2491,19 @@ void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
 
 
 void CodeGenDAGPatterns::InferInstructionFlags() {
-  std::map<std::string, CodeGenInstruction> &InstrDescs =
-    Target.getInstructions();
-  for (std::map<std::string, CodeGenInstruction>::iterator
-         II = InstrDescs.begin(), E = InstrDescs.end(); II != E; ++II) {
-    CodeGenInstruction &InstInfo = II->second;
+  const std::vector<const CodeGenInstruction*> &Instructions =
+    Target.getInstructionsByEnumValue();
+  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
+    CodeGenInstruction &InstInfo =
+      const_cast<CodeGenInstruction &>(*Instructions[i]);
     // Determine properties of the instruction from its pattern.
-    bool MayStore, MayLoad, HasSideEffects;
-    InferFromPattern(InstInfo, MayStore, MayLoad, HasSideEffects, *this);
+    bool MayStore, MayLoad, HasSideEffects, IsVariadic;
+    InferFromPattern(InstInfo, MayStore, MayLoad, HasSideEffects, IsVariadic,
+                     *this);
     InstInfo.mayStore = MayStore;
     InstInfo.mayLoad = MayLoad;
     InstInfo.hasSideEffects = HasSideEffects;
+    InstInfo.isVariadic = IsVariadic;
   }
 }
 
@@ -2378,23 +2524,29 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
 
   // If this type is already concrete or completely unknown we can't do
   // anything.
-  if (N->getExtType().isCompletelyUnknown() || N->getExtType().isConcrete())
-    return false;
+  for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i) {
+    if (N->getExtType(i).isCompletelyUnknown() || N->getExtType(i).isConcrete())
+      continue;
+  
+    // Otherwise, force its type to the first possibility (an arbitrary choice).
+    if (N->getExtType(i).MergeInTypeInfo(N->getExtType(i).getTypeList()[0], TP))
+      return true;
+  }
   
-  // Otherwise, force its type to the first possibility (an arbitrary choice).
-  return N->getExtType().MergeInTypeInfo(N->getExtType().getTypeList()[0], TP);
+  return false;
 }
 
 void CodeGenDAGPatterns::ParsePatterns() {
   std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
 
   for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
-    DagInit *Tree = Patterns[i]->getValueAsDag("PatternToMatch");
+    Record *CurPattern = Patterns[i];
+    DagInit *Tree = CurPattern->getValueAsDag("PatternToMatch");
     DefInit *OpDef = dynamic_cast<DefInit*>(Tree->getOperator());
     Record *Operator = OpDef->getDef();
     TreePattern *Pattern;
     if (Operator->getName() != "parallel")
-      Pattern = new TreePattern(Patterns[i], Tree, true, *this);
+      Pattern = new TreePattern(CurPattern, Tree, true, *this);
     else {
       std::vector<Init*> Values;
       RecTy *ListTy = 0;
@@ -2419,17 +2571,17 @@ void CodeGenDAGPatterns::ParsePatterns() {
         }
       }
       ListInit *LI = new ListInit(Values, new ListRecTy(ListTy));
-      Pattern = new TreePattern(Patterns[i], LI, true, *this);
+      Pattern = new TreePattern(CurPattern, LI, true, *this);
     }
 
     // Inline pattern fragments into it.
     Pattern->InlinePatternFragments();
     
-    ListInit *LI = Patterns[i]->getValueAsListInit("ResultInstrs");
+    ListInit *LI = CurPattern->getValueAsListInit("ResultInstrs");
     if (LI->getSize() == 0) continue;  // no pattern.
     
     // Parse the instruction.
-    TreePattern *Result = new TreePattern(Patterns[i], LI, false, *this);
+    TreePattern *Result = new TreePattern(CurPattern, LI, false, *this);
     
     // Inline pattern fragments into it.
     Result->InlinePatternFragments();
@@ -2451,14 +2603,20 @@ void CodeGenDAGPatterns::ParsePatterns() {
       InferredAllResultTypes =
         Result->InferAllTypes(&Pattern->getNamedNodesMap());
 
+      IterateInference = false;
+      
       // Apply the type of the result to the source pattern.  This helps us
       // resolve cases where the input type is known to be a pointer type (which
       // is considered resolved), but the result knows it needs to be 32- or
       // 64-bits.  Infer the other way for good measure.
-      IterateInference = Pattern->getTree(0)->
-        UpdateNodeType(Result->getTree(0)->getExtType(), *Result);
-      IterateInference |= Result->getTree(0)->
-        UpdateNodeType(Pattern->getTree(0)->getExtType(), *Result);
+      for (unsigned i = 0, e = std::min(Result->getTree(0)->getNumTypes(),
+                                        Pattern->getTree(0)->getNumTypes());
+           i != e; ++i) {
+        IterateInference = Pattern->getTree(0)->
+          UpdateNodeType(i, Result->getTree(0)->getExtType(i), *Result);
+        IterateInference |= Result->getTree(0)->
+          UpdateNodeType(i, Pattern->getTree(0)->getExtType(i), *Result);
+      }
       
       // If our iteration has converged and the input pattern's types are fully
       // resolved but the result pattern is not fully resolved, we may have a
@@ -2473,7 +2631,6 @@ void CodeGenDAGPatterns::ParsePatterns() {
           !InferredAllResultTypes)
         IterateInference = ForceArbitraryInstResultType(Result->getTree(0),
                                                         *Result);
-      
     } while (IterateInference);
     
     // Verify that we inferred enough types that we can do something with the
@@ -2504,25 +2661,29 @@ void CodeGenDAGPatterns::ParsePatterns() {
         OpNode->setTransformFn(0);
         std::vector<TreePatternNode*> Children;
         Children.push_back(OpNode);
-        OpNode = new TreePatternNode(Xform, Children);
+        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
       }
       ResultNodeOperands.push_back(OpNode);
     }
     DstPattern = Result->getOnlyTree();
     if (!DstPattern->isLeaf())
       DstPattern = new TreePatternNode(DstPattern->getOperator(),
-                                       ResultNodeOperands);
-    DstPattern->setType(Result->getOnlyTree()->getExtType());
+                                       ResultNodeOperands,
+                                       DstPattern->getNumTypes());
+    
+    for (unsigned i = 0, e = Result->getOnlyTree()->getNumTypes(); i != e; ++i)
+      DstPattern->setType(i, Result->getOnlyTree()->getExtType(i));
+    
     TreePattern Temp(Result->getRecord(), DstPattern, false, *this);
     Temp.InferAllTypes();
 
     
     AddPatternToMatch(Pattern,
-                 PatternToMatch(Patterns[i]->getValueAsListInit("Predicates"),
-                                Pattern->getTree(0),
-                                Temp.getOnlyTree(), InstImpResults,
-                                Patterns[i]->getValueAsInt("AddedComplexity"),
-                                Patterns[i]->getID()));
+                    PatternToMatch(CurPattern->getValueAsListInit("Predicates"),
+                                   Pattern->getTree(0),
+                                   Temp.getOnlyTree(), InstImpResults,
+                                   CurPattern->getValueAsInt("AddedComplexity"),
+                                   CurPattern->getID()));
   }
 }
 
@@ -2556,13 +2717,15 @@ static void CombineChildVariants(TreePatternNode *Orig,
     std::vector<TreePatternNode*> NewChildren;
     for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
       NewChildren.push_back(ChildVariants[i][Idxs[i]]);
-    TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren);
+    TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren,
+                                             Orig->getNumTypes());
     
     // Copy over properties.
     R->setName(Orig->getName());
     R->setPredicateFns(Orig->getPredicateFns());
     R->setTransformFn(Orig->getTransformFn());
-    R->setType(Orig->getExtType());
+    for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
+      R->setType(i, Orig->getExtType(i));
     
     // If this pattern cannot match, do not include it as a variant.
     std::string ErrString;
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 44f82fe..f583f29 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -15,14 +15,14 @@
 #ifndef CODEGEN_DAGPATTERNS_H
 #define CODEGEN_DAGPATTERNS_H
 
-#include <set>
-#include <algorithm>
-#include <vector>
-
 #include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include <set>
+#include <algorithm>
+#include <vector>
+#include <map>
 
 namespace llvm {
   class Record;
@@ -41,11 +41,6 @@ namespace llvm {
 /// arbitrary integer, floating-point, and vector types, so only an unknown
 /// value is needed.
 namespace EEVT {
-  enum DAGISelGenValueType {
-    // FIXME: Remove EEVT::isUnknown!
-    isUnknown  = MVT::LAST_VALUETYPE
-  };
-  
   /// TypeSet - This is either empty if it's completely unknown, or holds a set
   /// of types.  It is used during type inference because register classes can
   /// have multiple possible types and we don't know which one they get until
@@ -59,7 +54,7 @@ namespace EEVT {
   ///    Vector has one concrete type: The type is completely known.
   ///
   class TypeSet {
-    SmallVector<MVT::SimpleValueType, 2> TypeVec;
+    SmallVector<MVT::SimpleValueType, 4> TypeVec;
   public:
     TypeSet() {}
     TypeSet(MVT::SimpleValueType VT, TreePattern &TP);
@@ -88,6 +83,10 @@ namespace EEVT {
       return TypeVec;
     }
     
+    bool isVoid() const {
+      return TypeVec.size() == 1 && TypeVec[0] == MVT::isVoid;
+    }
+    
     /// hasIntegerTypes - Return true if this TypeSet contains any integer value
     /// types.
     bool hasIntegerTypes() const;
@@ -134,6 +133,14 @@ namespace EEVT {
     
     bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
     bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
+    
+  private:
+    /// FillWithPossibleTypes - Set to all legal types and return true, only
+    /// valid on completely unknown type sets.  If Pred is non-null, only MVTs
+    /// that pass the predicate are added.
+    bool FillWithPossibleTypes(TreePattern &TP,
+                               bool (*Pred)(MVT::SimpleValueType) = 0,
+                               const char *PredicateName = 0);
   };
 }
 
@@ -175,11 +182,6 @@ struct SDTypeConstraint {
   /// exception.
   bool ApplyTypeConstraint(TreePatternNode *N, const SDNodeInfo &NodeInfo,
                            TreePattern &TP) const;
-  
-  /// getOperandNum - Return the node corresponding to operand #OpNo in tree
-  /// N, which has NumResults results.
-  TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N,
-                                 unsigned NumResults) const;
 };
 
 /// SDNodeInfo - One of these records is created for each SDNode instance in
@@ -208,8 +210,8 @@ public:
   
   /// getKnownType - If the type constraints on this node imply a fixed type
   /// (e.g. all stores return void, etc), then return it as an
-  /// MVT::SimpleValueType.  Otherwise, return EEVT::isUnknown.
-  unsigned getKnownType() const;
+  /// MVT::SimpleValueType.  Otherwise, return MVT::Other.
+  MVT::SimpleValueType getKnownType() const;
   
   /// hasProperty - Return true if this node has the specified property.
   ///
@@ -231,10 +233,10 @@ public:
 /// patterns), and as such should be ref counted.  We currently just leak all
 /// TreePatternNode objects!
 class TreePatternNode {
-  /// The type of this node.  Before and during type inference, this may be a
-  /// set of possible types.  After (successful) type inference, this is a
-  /// single type.
-  EEVT::TypeSet Type;
+  /// The type of each node result.  Before and during type inference, each
+  /// result may be a set of possible types.  After (successful) type inference,
+  /// each is a single concrete type.
+  SmallVector<EEVT::TypeSet, 1> Types;
   
   /// Operator - The Record for the operator if this is an interior node (not
   /// a leaf).
@@ -258,10 +260,14 @@ class TreePatternNode {
   
   std::vector<TreePatternNode*> Children;
 public:
-  TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch) 
-    : Operator(Op), Val(0), TransformFn(0), Children(Ch) { }
-  TreePatternNode(Init *val)    // leaf ctor
+  TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch,
+                  unsigned NumResults) 
+    : Operator(Op), Val(0), TransformFn(0), Children(Ch) {
+    Types.resize(NumResults);
+  }
+  TreePatternNode(Init *val, unsigned NumResults)    // leaf ctor
     : Operator(0), Val(val), TransformFn(0) {
+    Types.resize(NumResults);
   }
   ~TreePatternNode();
   
@@ -271,14 +277,24 @@ public:
   bool isLeaf() const { return Val != 0; }
   
   // Type accessors.
-  MVT::SimpleValueType getType() const { return Type.getConcrete(); }
-  const EEVT::TypeSet &getExtType() const { return Type; }
-  EEVT::TypeSet &getExtType() { return Type; }
-  void setType(const EEVT::TypeSet &T) { Type = T; }
+  unsigned getNumTypes() const { return Types.size(); }
+  MVT::SimpleValueType getType(unsigned ResNo) const {
+    return Types[ResNo].getConcrete();
+  }
+  const SmallVectorImpl<EEVT::TypeSet> &getExtTypes() const { return Types; }
+  const EEVT::TypeSet &getExtType(unsigned ResNo) const { return Types[ResNo]; }
+  EEVT::TypeSet &getExtType(unsigned ResNo) { return Types[ResNo]; }
+  void setType(unsigned ResNo, const EEVT::TypeSet &T) { Types[ResNo] = T; }
   
-  bool hasTypeSet() const { return Type.isConcrete(); }
-  bool isTypeCompletelyUnknown() const { return Type.isCompletelyUnknown(); }
-  bool isTypeDynamicallyResolved() const { return Type.isDynamicallyResolved();}
+  bool hasTypeSet(unsigned ResNo) const {
+    return Types[ResNo].isConcrete();
+  }
+  bool isTypeCompletelyUnknown(unsigned ResNo) const {
+    return Types[ResNo].isCompletelyUnknown();
+  }
+  bool isTypeDynamicallyResolved(unsigned ResNo) const {
+    return Types[ResNo].isDynamicallyResolved();
+  }
   
   Init *getLeafValue() const { assert(isLeaf()); return Val; }
   Record *getOperator() const { assert(!isLeaf()); return Operator; }
@@ -371,18 +387,22 @@ public:   // Higher level manipulation routines.
   /// information.  If N already contains a conflicting type, then throw an
   /// exception.  This returns true if any information was updated.
   ///
-  bool UpdateNodeType(const EEVT::TypeSet &InTy, TreePattern &TP) {
-    return Type.MergeInTypeInfo(InTy, TP);
+  bool UpdateNodeType(unsigned ResNo, const EEVT::TypeSet &InTy,
+                      TreePattern &TP) {
+    return Types[ResNo].MergeInTypeInfo(InTy, TP);
   }
 
-  bool UpdateNodeType(MVT::SimpleValueType InTy, TreePattern &TP) {
-    return Type.MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
+  bool UpdateNodeType(unsigned ResNo, MVT::SimpleValueType InTy,
+                      TreePattern &TP) {
+    return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
   }
   
   /// ContainsUnresolvedType - Return true if this tree contains any
   /// unresolved types.
   bool ContainsUnresolvedType() const {
-    if (!hasTypeSet()) return true;
+    for (unsigned i = 0, e = Types.size(); i != e; ++i)
+      if (!Types[i].isConcrete()) return true;
+    
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       if (getChild(i)->ContainsUnresolvedType()) return true;
     return false;
@@ -672,6 +692,11 @@ public:
     assert(PatternFragments.count(R) && "Invalid pattern fragment request!");
     return PatternFragments.find(R)->second;
   }
+  TreePattern *getPatternFragmentIfRead(Record *R) const {
+    if (!PatternFragments.count(R)) return 0;
+    return PatternFragments.find(R)->second;
+  }
+  
   typedef std::map<Record*, TreePattern*, RecordPtrCmp>::const_iterator
           pf_iterator;
   pf_iterator pf_begin() const { return PatternFragments.begin(); }
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index f5b52ec..eea5561 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -123,36 +123,43 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
   hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
   hasOptionalDef = false;
   isVariadic = false;
+  ImplicitDefs = R->getValueAsListOfDefs("Defs");
+  ImplicitUses = R->getValueAsListOfDefs("Uses");
 
   if (neverHasSideEffects + hasSideEffects > 1)
     throw R->getName() + ": multiple conflicting side-effect flags set!";
 
-  DagInit *DI;
-  try {
-    DI = R->getValueAsDag("OutOperandList");
-  } catch (...) {
-    // Error getting operand list, just ignore it (sparcv9).
-    AsmString.clear();
-    OperandList.clear();
-    return;
-  }
-  NumDefs = DI->getNumArgs();
-
-  DagInit *IDI;
-  try {
-    IDI = R->getValueAsDag("InOperandList");
-  } catch (...) {
-    // Error getting operand list, just ignore it (sparcv9).
-    AsmString.clear();
-    OperandList.clear();
-    return;
-  }
-  DI = (DagInit*)(new BinOpInit(BinOpInit::CONCAT, DI, IDI, new DagRecTy))->Fold(R, 0);
-
+  DagInit *OutDI = R->getValueAsDag("OutOperandList");
+
+  if (DefInit *Init = dynamic_cast<DefInit*>(OutDI->getOperator())) {
+    if (Init->getDef()->getName() != "outs")
+      throw R->getName() + ": invalid def name for output list: use 'outs'";
+  } else
+    throw R->getName() + ": invalid output list: use 'outs'";
+    
+  NumDefs = OutDI->getNumArgs();
+    
+  DagInit *InDI = R->getValueAsDag("InOperandList");
+  if (DefInit *Init = dynamic_cast<DefInit*>(InDI->getOperator())) {
+    if (Init->getDef()->getName() != "ins")
+      throw R->getName() + ": invalid def name for input list: use 'ins'";
+  } else
+    throw R->getName() + ": invalid input list: use 'ins'";
+    
   unsigned MIOperandNo = 0;
   std::set<std::string> OperandNames;
-  for (unsigned i = 0, e = DI->getNumArgs(); i != e; ++i) {
-    DefInit *Arg = dynamic_cast<DefInit*>(DI->getArg(i));
+  for (unsigned i = 0, e = InDI->getNumArgs()+OutDI->getNumArgs(); i != e; ++i){
+    Init *ArgInit;
+    std::string ArgName;
+    if (i < NumDefs) {
+      ArgInit = OutDI->getArg(i);
+      ArgName = OutDI->getArgName(i);
+    } else {
+      ArgInit = InDI->getArg(i-NumDefs);
+      ArgName = InDI->getArgName(i-NumDefs);
+    }
+    
+    DefInit *Arg = dynamic_cast<DefInit*>(ArgInit);
     if (!Arg)
       throw "Illegal operand for the '" + R->getName() + "' instruction!";
 
@@ -189,14 +196,14 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
             "' in '" + R->getName() + "' instruction!";
 
     // Check that the operand has a name and that it's unique.
-    if (DI->getArgName(i).empty())
+    if (ArgName.empty())
       throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
         " has no name!";
-    if (!OperandNames.insert(DI->getArgName(i)).second)
+    if (!OperandNames.insert(ArgName).second)
       throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
         " has the same name as a previous operand!";
 
-    OperandList.push_back(OperandInfo(Rec, DI->getArgName(i), PrintMethod,
+    OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod,
                                       MIOperandNo, NumOps, MIOpInfo));
     MIOperandNo += NumOps;
   }
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index aae2cac..c369123 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -105,7 +105,8 @@ namespace llvm {
           MINumOperands(MINO), MIOperandInfo(MIOI) {}
     };
 
-    /// NumDefs - Number of def operands declared.
+    /// NumDefs - Number of def operands declared, this is the number of
+    /// elements in the instruction's (outs) list.
     ///
     unsigned NumDefs;
 
@@ -113,6 +114,10 @@ namespace llvm {
     /// type (which is a record).
     std::vector<OperandInfo> OperandList;
 
+    /// ImplicitDefs/ImplicitUses - These are lists of registers that are
+    /// implicitly defined and used by the instruction.
+    std::vector<Record*> ImplicitDefs, ImplicitUses;
+
     // Various boolean values we track for the instruction.
     bool isReturn;
     bool isBranch;
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index ec6a31f..79bc30d 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -120,24 +120,21 @@ const std::string &CodeGenTarget::getName() const {
 }
 
 std::string CodeGenTarget::getInstNamespace() const {
-  std::string InstNS;
-
   for (inst_iterator i = inst_begin(), e = inst_end(); i != e; ++i) {
-    InstNS = i->second.Namespace;
-
-    // Make sure not to pick up "TargetInstrInfo" by accidentally getting
+    // Make sure not to pick up "TargetOpcode" by accidentally getting
     // the namespace off the PHI instruction or something.
-    if (InstNS != "TargetInstrInfo")
-      break;
+    if ((*i)->Namespace != "TargetOpcode")
+      return (*i)->Namespace;
   }
 
-  return InstNS;
+  return "";
 }
 
 Record *CodeGenTarget::getInstructionSet() const {
   return TargetRec->getValueAsDef("InstructionSet");
 }
 
+
 /// getAsmParser - Return the AssemblyParser definition for this target.
 ///
 Record *CodeGenTarget::getAsmParser() const {
@@ -277,98 +274,92 @@ void CodeGenTarget::ReadInstructions() const {
 
   for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
     std::string AsmStr = Insts[i]->getValueAsString(InstFormatName);
-    Instructions.insert(std::make_pair(Insts[i]->getName(),
-                                       CodeGenInstruction(Insts[i], AsmStr)));
+    Instructions[Insts[i]] = new CodeGenInstruction(Insts[i], AsmStr);
   }
 }
 
-/// getInstructionsByEnumValue - Return all of the instructions defined by the
-/// target, ordered by their enum value.
-void CodeGenTarget::
-getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
-                                                 &NumberedInstructions) {
-  std::map<std::string, CodeGenInstruction>::const_iterator I;
-  I = getInstructions().find("PHI");
-  if (I == Instructions.end()) throw "Could not find 'PHI' instruction!";
-  const CodeGenInstruction *PHI = &I->second;
-  
-  I = getInstructions().find("INLINEASM");
-  if (I == Instructions.end()) throw "Could not find 'INLINEASM' instruction!";
-  const CodeGenInstruction *INLINEASM = &I->second;
-  
-  I = getInstructions().find("DBG_LABEL");
-  if (I == Instructions.end()) throw "Could not find 'DBG_LABEL' instruction!";
-  const CodeGenInstruction *DBG_LABEL = &I->second;
-  
-  I = getInstructions().find("EH_LABEL");
-  if (I == Instructions.end()) throw "Could not find 'EH_LABEL' instruction!";
-  const CodeGenInstruction *EH_LABEL = &I->second;
-  
-  I = getInstructions().find("GC_LABEL");
-  if (I == Instructions.end()) throw "Could not find 'GC_LABEL' instruction!";
-  const CodeGenInstruction *GC_LABEL = &I->second;
-  
-  I = getInstructions().find("KILL");
-  if (I == Instructions.end()) throw "Could not find 'KILL' instruction!";
-  const CodeGenInstruction *KILL = &I->second;
-  
-  I = getInstructions().find("EXTRACT_SUBREG");
-  if (I == Instructions.end()) 
-    throw "Could not find 'EXTRACT_SUBREG' instruction!";
-  const CodeGenInstruction *EXTRACT_SUBREG = &I->second;
-  
-  I = getInstructions().find("INSERT_SUBREG");
-  if (I == Instructions.end()) 
-    throw "Could not find 'INSERT_SUBREG' instruction!";
-  const CodeGenInstruction *INSERT_SUBREG = &I->second;
+static const CodeGenInstruction *
+GetInstByName(const char *Name,
+              const DenseMap<const Record*, CodeGenInstruction*> &Insts) {
+  const Record *Rec = Records.getDef(Name);
   
-  I = getInstructions().find("IMPLICIT_DEF");
-  if (I == Instructions.end())
-    throw "Could not find 'IMPLICIT_DEF' instruction!";
-  const CodeGenInstruction *IMPLICIT_DEF = &I->second;
-  
-  I = getInstructions().find("SUBREG_TO_REG");
-  if (I == Instructions.end())
-    throw "Could not find 'SUBREG_TO_REG' instruction!";
-  const CodeGenInstruction *SUBREG_TO_REG = &I->second;
+  DenseMap<const Record*, CodeGenInstruction*>::const_iterator
+    I = Insts.find(Rec);
+  if (Rec == 0 || I == Insts.end())
+    throw std::string("Could not find '") + Name + "' instruction!";
+  return I->second;
+}
 
-  I = getInstructions().find("COPY_TO_REGCLASS");
-  if (I == Instructions.end())
-    throw "Could not find 'COPY_TO_REGCLASS' instruction!";
-  const CodeGenInstruction *COPY_TO_REGCLASS = &I->second;
+namespace {
+/// SortInstByName - Sorting predicate to sort instructions by name.
+///
+struct SortInstByName {
+  bool operator()(const CodeGenInstruction *Rec1,
+                  const CodeGenInstruction *Rec2) const {
+    return Rec1->TheDef->getName() < Rec2->TheDef->getName();
+  }
+};
+}
 
-  I = getInstructions().find("DBG_VALUE");
-  if (I == Instructions.end())
-    throw "Could not find 'DBG_VALUE' instruction!";
-  const CodeGenInstruction *DBG_VALUE = &I->second;
+/// getInstructionsByEnumValue - Return all of the instructions defined by the
+/// target, ordered by their enum value.
+void CodeGenTarget::ComputeInstrsByEnum() const {
+  const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
+  const CodeGenInstruction *PHI = GetInstByName("PHI", Insts);
+  const CodeGenInstruction *INLINEASM = GetInstByName("INLINEASM", Insts);
+  const CodeGenInstruction *DBG_LABEL = GetInstByName("DBG_LABEL", Insts);
+  const CodeGenInstruction *EH_LABEL = GetInstByName("EH_LABEL", Insts);
+  const CodeGenInstruction *GC_LABEL = GetInstByName("GC_LABEL", Insts);
+  const CodeGenInstruction *KILL = GetInstByName("KILL", Insts);
+  const CodeGenInstruction *EXTRACT_SUBREG =
+    GetInstByName("EXTRACT_SUBREG", Insts);
+  const CodeGenInstruction *INSERT_SUBREG =
+    GetInstByName("INSERT_SUBREG", Insts);
+  const CodeGenInstruction *IMPLICIT_DEF = GetInstByName("IMPLICIT_DEF", Insts);
+  const CodeGenInstruction *SUBREG_TO_REG =
+    GetInstByName("SUBREG_TO_REG", Insts);
+  const CodeGenInstruction *COPY_TO_REGCLASS =
+    GetInstByName("COPY_TO_REGCLASS", Insts);
+  const CodeGenInstruction *DBG_VALUE = GetInstByName("DBG_VALUE", Insts);
 
   // Print out the rest of the instructions now.
-  NumberedInstructions.push_back(PHI);
-  NumberedInstructions.push_back(INLINEASM);
-  NumberedInstructions.push_back(DBG_LABEL);
-  NumberedInstructions.push_back(EH_LABEL);
-  NumberedInstructions.push_back(GC_LABEL);
-  NumberedInstructions.push_back(KILL);
-  NumberedInstructions.push_back(EXTRACT_SUBREG);
-  NumberedInstructions.push_back(INSERT_SUBREG);
-  NumberedInstructions.push_back(IMPLICIT_DEF);
-  NumberedInstructions.push_back(SUBREG_TO_REG);
-  NumberedInstructions.push_back(COPY_TO_REGCLASS);
-  NumberedInstructions.push_back(DBG_VALUE);
-  for (inst_iterator II = inst_begin(), E = inst_end(); II != E; ++II)
-    if (&II->second != PHI &&
-        &II->second != INLINEASM &&
-        &II->second != DBG_LABEL &&
-        &II->second != EH_LABEL &&
-        &II->second != GC_LABEL &&
-        &II->second != KILL &&
-        &II->second != EXTRACT_SUBREG &&
-        &II->second != INSERT_SUBREG &&
-        &II->second != IMPLICIT_DEF &&
-        &II->second != SUBREG_TO_REG &&
-        &II->second != COPY_TO_REGCLASS &&
-        &II->second != DBG_VALUE)
-      NumberedInstructions.push_back(&II->second);
+  InstrsByEnum.push_back(PHI);
+  InstrsByEnum.push_back(INLINEASM);
+  InstrsByEnum.push_back(DBG_LABEL);
+  InstrsByEnum.push_back(EH_LABEL);
+  InstrsByEnum.push_back(GC_LABEL);
+  InstrsByEnum.push_back(KILL);
+  InstrsByEnum.push_back(EXTRACT_SUBREG);
+  InstrsByEnum.push_back(INSERT_SUBREG);
+  InstrsByEnum.push_back(IMPLICIT_DEF);
+  InstrsByEnum.push_back(SUBREG_TO_REG);
+  InstrsByEnum.push_back(COPY_TO_REGCLASS);
+  InstrsByEnum.push_back(DBG_VALUE);
+  
+  unsigned EndOfPredefines = InstrsByEnum.size();
+  
+  for (DenseMap<const Record*, CodeGenInstruction*>::const_iterator
+       I = Insts.begin(), E = Insts.end(); I != E; ++I) {
+    const CodeGenInstruction *CGI = I->second;
+    if (CGI != PHI &&
+        CGI != INLINEASM &&
+        CGI != DBG_LABEL &&
+        CGI != EH_LABEL &&
+        CGI != GC_LABEL &&
+        CGI != KILL &&
+        CGI != EXTRACT_SUBREG &&
+        CGI != INSERT_SUBREG &&
+        CGI != IMPLICIT_DEF &&
+        CGI != SUBREG_TO_REG &&
+        CGI != COPY_TO_REGCLASS &&
+        CGI != DBG_VALUE)
+      InstrsByEnum.push_back(CGI);
+  }
+  
+  // All of the instructions are now in random order based on the map iteration.
+  // Sort them by name.
+  std::sort(InstrsByEnum.begin()+EndOfPredefines, InstrsByEnum.end(),
+            SortInstByName());
 }
 
 
@@ -404,6 +395,8 @@ ComplexPattern::ComplexPattern(Record *R) {
       Properties |= 1 << SDNPSideEffect;
     } else if (PropList[i]->getName() == "SDNPMemOperand") {
       Properties |= 1 << SDNPMemOperand;
+    } else if (PropList[i]->getName() == "SDNPVariadic") {
+      Properties |= 1 << SDNPVariadic;
     } else {
       errs() << "Unsupported SD Node property '" << PropList[i]->getName()
              << "' on ComplexPattern '" << R->getName() << "'!\n";
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 1df74af..2926418 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -17,11 +17,11 @@
 #ifndef CODEGEN_TARGET_H
 #define CODEGEN_TARGET_H
 
-#include "llvm/Support/raw_ostream.h"
 #include "CodeGenRegisters.h"
 #include "CodeGenInstruction.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
 #include <algorithm>
-#include <map>
 
 namespace llvm {
 
@@ -43,7 +43,8 @@ enum SDNP {
   SDNPMayLoad,
   SDNPMayStore,
   SDNPSideEffect,
-  SDNPMemOperand
+  SDNPMemOperand,
+  SDNPVariadic
 };
 
 /// getValueType - Return the MVT::SimpleValueType that the specified TableGen
@@ -62,7 +63,7 @@ std::string getQualifiedName(const Record *R);
 class CodeGenTarget {
   Record *TargetRec;
 
-  mutable std::map<std::string, CodeGenInstruction> Instructions;
+  mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
   mutable std::vector<CodeGenRegister> Registers;
   mutable std::vector<CodeGenRegisterClass> RegisterClasses;
   mutable std::vector<MVT::SimpleValueType> LegalValueTypes;
@@ -70,6 +71,8 @@ class CodeGenTarget {
   void ReadRegisterClasses() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
+  
+  mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
 public:
   CodeGenTarget();
 
@@ -183,37 +186,40 @@ public:
     return false;    
   }
 
-  /// getInstructions - Return all of the instructions defined for this target.
-  ///
-  const std::map<std::string, CodeGenInstruction> &getInstructions() const {
+private:
+  DenseMap<const Record*, CodeGenInstruction*> &getInstructions() const {
     if (Instructions.empty()) ReadInstructions();
     return Instructions;
   }
-  std::map<std::string, CodeGenInstruction> &getInstructions() {
+public:
+  
+  CodeGenInstruction &getInstruction(const Record *InstRec) const {
     if (Instructions.empty()) ReadInstructions();
-    return Instructions;
+    DenseMap<const Record*, CodeGenInstruction*>::iterator I =
+      Instructions.find(InstRec);
+    assert(I != Instructions.end() && "Not an instruction");
+    return *I->second;
   }
 
-  CodeGenInstruction &getInstruction(const std::string &Name) const {
-    const std::map<std::string, CodeGenInstruction> &Insts = getInstructions();
-    assert(Insts.count(Name) && "Not an instruction!");
-    return const_cast<CodeGenInstruction&>(Insts.find(Name)->second);
-  }
-
-  typedef std::map<std::string,
-                   CodeGenInstruction>::const_iterator inst_iterator;
-  inst_iterator inst_begin() const { return getInstructions().begin(); }
-  inst_iterator inst_end() const { return Instructions.end(); }
-
   /// getInstructionsByEnumValue - Return all of the instructions defined by the
   /// target, ordered by their enum value.
-  void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
-                                                &NumberedInstructions);
-
+  const std::vector<const CodeGenInstruction*> &
+  getInstructionsByEnumValue() const {
+    if (InstrsByEnum.empty()) ComputeInstrsByEnum();
+    return InstrsByEnum;
+  }
 
+  typedef std::vector<const CodeGenInstruction*>::const_iterator inst_iterator;
+  inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
+  inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
+  
+  
   /// isLittleEndianEncoding - are instruction bit patterns defined as  [0..n]?
   ///
   bool isLittleEndianEncoding() const;
+  
+private:
+  void ComputeInstrsByEnum() const;
 };
 
 /// ComplexPattern - ComplexPattern info, corresponding to the ComplexPattern
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 73feac1..044086a 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -25,7 +25,6 @@ using namespace llvm;
 /// patterns before small ones.  This is used to determine the size of a
 /// pattern.
 static unsigned getPatternSize(TreePatternNode *P, CodeGenDAGPatterns &CGP) {
-  assert(P->hasTypeSet() && "Not a valid pattern node to size!");
   unsigned Size = 3;  // The node itself.
   // If the root node is a ConstantSDNode, increases its size.
   // e.g. (set R32:$dst, 0).
@@ -49,7 +48,8 @@ static unsigned getPatternSize(TreePatternNode *P, CodeGenDAGPatterns &CGP) {
   // Count children in the count if they are also nodes.
   for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
     TreePatternNode *Child = P->getChild(i);
-    if (!Child->isLeaf() && Child->getType() != MVT::Other)
+    if (!Child->isLeaf() && Child->getNumTypes() &&
+        Child->getType(0) != MVT::Other)
       Size += getPatternSize(Child, CGP);
     else if (Child->isLeaf()) {
       if (dynamic_cast<IntInit*>(Child->getLeafValue())) 
@@ -75,7 +75,7 @@ static unsigned getResultPatternCost(TreePatternNode *P,
   Record *Op = P->getOperator();
   if (Op->isSubClassOf("Instruction")) {
     Cost++;
-    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op->getName());
+    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
     if (II.usesCustomInserter)
       Cost += 10;
   }
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index 22d2fe8..cd3fad1 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -357,14 +357,13 @@ bool CheckOpcodeMatcher::isContradictoryImpl(const Matcher *M) const {
   // ISD::STORE will never be true at the same time a check for Type i32 is.
   if (const CheckTypeMatcher *CT = dyn_cast<CheckTypeMatcher>(M)) {
     // FIXME: What result is this referring to?
-    unsigned NodeType;
+    MVT::SimpleValueType NodeType;
     if (getOpcode().getNumResults() == 0)
       NodeType = MVT::isVoid;
     else
       NodeType = getOpcode().getKnownType();
-    if (NodeType != EEVT::isUnknown)
-      return TypesAreContradictory((MVT::SimpleValueType)NodeType,
-                                   CT->getType());
+    if (NodeType != MVT::Other)
+      return TypesAreContradictory(NodeType, CT->getType());
   }
   
   return false;
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 375df6b..da6f6af 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -409,8 +409,10 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   // need to do a type check.  Emit the check, apply the tyep to NodeNoTypes and
   // reinfer any correlated types.
   bool DoTypeCheck = false;
-  if (NodeNoTypes->getExtType() != N->getExtType()) {
-    NodeNoTypes->setType(N->getExtType());
+  if (NodeNoTypes->getNumTypes() != 0 &&
+      NodeNoTypes->getExtType(0) != N->getExtType(0)) {
+    assert(NodeNoTypes->getNumTypes() == 1 && "FIXME: Handle multiple results");
+    NodeNoTypes->setType(0, N->getExtType(0));
     InferPossibleTypes();
     DoTypeCheck = true;
   }
@@ -442,8 +444,10 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
     AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
   
-  if (DoTypeCheck)
-    AddMatcher(new CheckTypeMatcher(N->getType()));
+  if (DoTypeCheck) {
+    assert(N->getNumTypes() == 1);
+    AddMatcher(new CheckTypeMatcher(N->getType(0)));
+  }
 }
 
 /// EmitMatcherCode - Generate the code that matches the predicate of this
@@ -567,7 +571,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   assert(N->isLeaf() && "Must be a leaf");
   
   if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
-    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType()));
+    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType(0)));
     ResultOps.push_back(NextRecordedOperandNo++);
     return;
   }
@@ -575,13 +579,13 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   // If this is an explicit register reference, handle it.
   if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
     if (DI->getDef()->isSubClassOf("Register")) {
-      AddMatcher(new EmitRegisterMatcher(DI->getDef(), N->getType()));
+      AddMatcher(new EmitRegisterMatcher(DI->getDef(), N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
     
     if (DI->getDef()->getName() == "zero_reg") {
-      AddMatcher(new EmitRegisterMatcher(0, N->getType()));
+      AddMatcher(new EmitRegisterMatcher(0, N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
@@ -627,7 +631,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
                                SmallVectorImpl<unsigned> &OutputOps) {
   Record *Op = N->getOperator();
   const CodeGenTarget &CGT = CGP.getTargetInfo();
-  CodeGenInstruction &II = CGT.getInstruction(Op->getName());
+  CodeGenInstruction &II = CGT.getInstruction(Op);
   const DAGInstruction &Inst = CGP.getInstruction(Op);
   
   // If we can, get the pattern for the instruction we're generating.  We derive
@@ -698,7 +702,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
     // occur in patterns like (mul:i8 AL:i8, GR8:i8:$src).
     for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i)
       AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second,
-                                                  PhysRegInputs[i].first));
+                                          PhysRegInputs[i].first));
     // Even if the node has no other flag inputs, the resultant node must be
     // flagged to the CopyFromReg nodes we just generated.
     TreeHasInFlag = true;
@@ -708,12 +712,12 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   
   // Determine the result types.
   SmallVector<MVT::SimpleValueType, 4> ResultVTs;
-  if (NumResults != 0 && N->getType() != MVT::isVoid) {
+  if (N->getNumTypes()) {
     // FIXME2: If the node has multiple results, we should add them.  For now,
     // preserve existing behavior?!
-    ResultVTs.push_back(N->getType());
+    assert(N->getNumTypes() == 1);
+    ResultVTs.push_back(N->getType(0));
   }
-
   
   // If this is the root instruction of a pattern that has physical registers in
   // its result pattern, add output VTs for them.  For example, X86 has:
@@ -721,16 +725,26 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   // This also handles implicit results like:
   //   (implicit EFLAGS)
   if (isRoot && Pattern.getDstRegs().size() != 0) {
-    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i)
-      if (Pattern.getDstRegs()[i]->isSubClassOf("Register"))
-        ResultVTs.push_back(getRegisterValueType(Pattern.getDstRegs()[i], CGT));
+    // If the root came from an implicit def in the instruction handling stuff,
+    // don't re-add it.
+    Record *HandledReg = 0;
+    if (NumResults == 0 && N->getNumTypes() != 0 &&
+        !II.ImplicitDefs.empty())
+      HandledReg = II.ImplicitDefs[0];
+    
+    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
+      Record *Reg = Pattern.getDstRegs()[i];
+      if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
+      ResultVTs.push_back(getRegisterValueType(Reg, CGT));
+    }
   }
 
-  // FIXME2: Instead of using the isVariadic flag on the instruction, we should
-  // have an SDNP that indicates variadicism.  The TargetInstrInfo isVariadic
-  // property should be inferred from this when an instruction has a pattern.
+  // If this is the root of the pattern and the pattern we're matching includes
+  // a node that is variadic, mark the generated node as variadic so that it
+  // gets the excess operands from the input DAG.
   int NumFixedArityOperands = -1;
-  if (isRoot && II.isVariadic)
+  if (isRoot &&
+      (Pattern.getSrcPattern()->NodeHasProperty(SDNPVariadic, CGP)))
     NumFixedArityOperands = Pattern.getSrcPattern()->getNumChildren();
   
   // If this is the root node and any of the nodes matched nodes in the input
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 61b9b15..a195c0b 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -108,8 +108,8 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
   if (Target.getName() == "X86") {
     DisassemblerTables Tables;
   
-    std::vector<const CodeGenInstruction*> numberedInstructions;
-    Target.getInstructionsByEnumValue(numberedInstructions);
+    const std::vector<const CodeGenInstruction*> &numberedInstructions =
+      Target.getInstructionsByEnumValue();
     
     for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
       RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 2149410..f83ab48 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -549,8 +549,8 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
 /// @arg target     - The CodeGenTarget to use as a source of instructions
 static void populateInstInfo(CompoundConstantEmitter &infoArray,
                              CodeGenTarget &target) {
-  std::vector<const CodeGenInstruction*> numberedInstructions;
-  target.getInstructionsByEnumValue(numberedInstructions);
+  const std::vector<const CodeGenInstruction*> &numberedInstructions =
+    target.getInstructionsByEnumValue();
   
   unsigned int index;
   unsigned int numInstructions = numberedInstructions.size();
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index b94ded6..23f09a5 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -70,13 +70,16 @@ struct OperandsSignature {
     for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
       TreePatternNode *Op = InstPatNode->getChild(i);
       // For now, filter out any operand with a predicate.
-      if (!Op->getPredicateFns().empty())
-        return false;
       // For now, filter out any operand with multiple values.
-      assert(Op->hasTypeSet() && "Type infererence not done?");
+      if (!Op->getPredicateFns().empty() ||
+          Op->getNumTypes() != 1)
+        return false;
+      
+      assert(Op->hasTypeSet(0) && "Type infererence not done?");
       // For now, all the operands must have the same type.
-      if (Op->getType() != VT)
+      if (Op->getType(0) != VT)
         return false;
+      
       if (!Op->isLeaf()) {
         if (Op->getOperator()->getName() == "imm") {
           Operands.push_back("i");
@@ -254,7 +257,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     Record *Op = Dst->getOperator();
     if (!Op->isSubClassOf("Instruction"))
       continue;
-    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op->getName());
+    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
     if (II.OperandList.empty())
       continue;
 
@@ -295,10 +298,14 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
 
     Record *InstPatOp = InstPatNode->getOperator();
     std::string OpcodeName = getOpcodeName(InstPatOp, CGP);
-    MVT::SimpleValueType RetVT = InstPatNode->getType();
+    assert(InstPatNode->getNumTypes() <= 1);
+    MVT::SimpleValueType RetVT = MVT::isVoid;
+    if (InstPatNode->getNumTypes()) RetVT = InstPatNode->getType(0);
     MVT::SimpleValueType VT = RetVT;
-    if (InstPatNode->getNumChildren())
-      VT = InstPatNode->getChild(0)->getType();
+    if (InstPatNode->getNumChildren()) {
+      assert(InstPatNode->getChild(0)->getNumTypes() == 1);
+      VT = InstPatNode->getChild(0)->getType(0);
+    }
 
     // For now, filter out instructions which just set a register to
     // an Operand or an immediate, like MOV32ri.
diff --git a/utils/TableGen/InstrEnumEmitter.cpp b/utils/TableGen/InstrEnumEmitter.cpp
index d1e7f3d..47a8474 100644
--- a/utils/TableGen/InstrEnumEmitter.cpp
+++ b/utils/TableGen/InstrEnumEmitter.cpp
@@ -26,22 +26,15 @@ void InstrEnumEmitter::run(raw_ostream &OS) {
   CodeGenTarget Target;
 
   // We must emit the PHI opcode first...
-  std::string Namespace;
-  for (CodeGenTarget::inst_iterator II = Target.inst_begin(), 
-       E = Target.inst_end(); II != E; ++II) {
-    if (II->second.Namespace != "TargetOpcode") {
-      Namespace = II->second.Namespace;
-      break;
-    }
-  }
+  std::string Namespace = Target.getInstNamespace();
   
   if (Namespace.empty()) {
     fprintf(stderr, "No instructions defined!\n");
     exit(1);
   }
 
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
 
   OS << "namespace " << Namespace << " {\n";
   OS << "  enum {\n";
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 898c92a..8f7550b 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -39,16 +39,10 @@ static void PrintBarriers(std::vector<Record*> &Barriers,
 // Instruction Itinerary Information.
 //===----------------------------------------------------------------------===//
 
-struct RecordNameComparator {
-  bool operator()(const Record *Rec1, const Record *Rec2) const {
-    return Rec1->getName() < Rec2->getName();
-  }
-};
-
 void InstrInfoEmitter::GatherItinClasses() {
   std::vector<Record*> DefList =
   Records.getAllDerivedDefinitions("InstrItinClass");
-  std::sort(DefList.begin(), DefList.end(), RecordNameComparator());
+  std::sort(DefList.begin(), DefList.end(), LessRecord());
   
   for (unsigned i = 0, N = DefList.size(); i < N; i++)
     ItinClassMap[DefList[i]->getName()] = i;
@@ -149,7 +143,7 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
   const CodeGenTarget &Target = CDP.getTargetInfo();
   for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
        E = Target.inst_end(); II != E; ++II) {
-    std::vector<std::string> OperandInfo = GetOperandInfo(II->second);
+    std::vector<std::string> OperandInfo = GetOperandInfo(**II);
     unsigned &N = OperandInfoIDs[OperandInfo];
     if (N != 0) continue;
     
@@ -214,7 +208,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   // Emit all of the instruction's implicit uses and defs.
   for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
          E = Target.inst_end(); II != E; ++II) {
-    Record *Inst = II->second.TheDef;
+    Record *Inst = (*II)->TheDef;
     std::vector<Record*> Uses = Inst->getValueAsListOfDefs("Uses");
     if (!Uses.empty()) {
       unsigned &IL = EmittedLists[Uses];
@@ -244,8 +238,8 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   //
   OS << "\nstatic const TargetInstrDesc " << TargetName
      << "Insts[] = {\n";
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
 
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i)
     emitRecord(*NumberedInstructions[i], i, InstrInfo, EmittedLists,
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index f9e2fe8..b9facb4 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -646,18 +646,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
     if (LHSs && RHSs) {
       DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator());
       DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator());
-      if (LOp->getDef() != ROp->getDef()) {
-        bool LIsOps =
-          LOp->getDef()->getName() == "outs" ||
-          LOp->getDef()->getName() != "ins" ||
-          LOp->getDef()->getName() != "defs";
-        bool RIsOps =
-          ROp->getDef()->getName() == "outs" ||
-          ROp->getDef()->getName() != "ins" ||
-          ROp->getDef()->getName() != "defs";
-        if (!LIsOps || !RIsOps)
-          throw "Concated Dag operators do not match!";
-      }
+      if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
+        throw "Concated Dag operators do not match!";
       std::vector<Init*> Args;
       std::vector<std::string> ArgNames;
       for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 1fa3fdf..f67794e 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -106,11 +106,10 @@ if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
   # Try to use the platform llvm-gcc. Fall back to gcc if it's not available.
   for prog in gcc g++ ; do
     P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
-# FIXME: Uncomment once llvm-gcc works for this
-#    T=`xcrun -find llvm-${prog}`
-#    if [ "x$T" = "x" ] ; then
+    T=`xcrun -find llvm-${prog}`
+    if [ "x$T" = "x" ] ; then
       T=`xcrun -sdk $SDKROOT -find ${prog}`
-#    fi
+    fi
     echo '#!/bin/sh' > $P || exit 1
     echo 'exec '$T' -arch armv6 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
     chmod a+x $P || exit 1
diff --git a/utils/lit/lit/ExampleTests/Clang/lit.cfg b/utils/lit/lit/ExampleTests/Clang/lit.cfg
index 114ac60..1e1e807 100644
--- a/utils/lit/lit/ExampleTests/Clang/lit.cfg
+++ b/utils/lit/lit/ExampleTests/Clang/lit.cfg
@@ -41,40 +41,7 @@ def inferClang(PATH):
 
     return clang
 
-def inferClangCC(clang, PATH):
-    clangcc = os.getenv('CLANGCC')
-
-    # If the user set clang in the environment, definitely use that and don't
-    # try to validate.
-    if clangcc:
-        return clangcc
-
-    # Otherwise try adding -cc since we expect to be looking in a build
-    # directory.
-    if clang.endswith('.exe'):
-        clangccName = clang[:-4] + '-cc.exe'
-    else:
-        clangccName = clang + '-cc'
-    clangcc = lit.util.which(clangccName, PATH)
-    if not clangcc:
-        # Otherwise ask clang.
-        res = lit.util.capture([clang, '-print-prog-name=clang-cc'])
-        res = res.strip()
-        if res and os.path.exists(res):
-            clangcc = res
-
-    if not clangcc:
-        lit.fatal("couldn't find 'clang-cc' program, try setting "
-                  "CLANGCC in your environment")
-
-    return clangcc
-
 clang = inferClang(config.environment['PATH'])
 if not lit.quiet:
     lit.note('using clang: %r' % clang)
 config.substitutions.append( (' clang ', ' ' + clang + ' ') )
-
-clang_cc = inferClangCC(clang, config.environment['PATH'])
-if not lit.quiet:
-    lit.note('using clang-cc: %r' % clang_cc)
-config.substitutions.append( (' clang-cc ', ' ' + clang_cc + ' ') )
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 0e0a493..9b62470 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -15,7 +15,7 @@ class LitConfig:
     import Util as util
 
     def __init__(self, progname, path, quiet,
-                 useValgrind, valgrindArgs,
+                 useValgrind, valgrindLeakCheck, valgrindArgs,
                  useTclAsSh,
                  noExecute, debug, isWindows,
                  params):
@@ -25,7 +25,8 @@ class LitConfig:
         self.path = list(map(str, path))
         self.quiet = bool(quiet)
         self.useValgrind = bool(useValgrind)
-        self.valgrindArgs = list(valgrindArgs)
+        self.valgrindLeakCheck = bool(valgrindLeakCheck)
+        self.valgrindUserArgs = list(valgrindArgs)
         self.useTclAsSh = bool(useTclAsSh)
         self.noExecute = noExecute
         self.debug = debug
@@ -36,6 +37,22 @@ class LitConfig:
         self.numErrors = 0
         self.numWarnings = 0
 
+        self.valgrindArgs = []
+        self.valgrindTriple = ""
+        if self.useValgrind:
+            self.valgrindTriple = "-vg"
+            self.valgrindArgs = ['valgrind', '-q', '--run-libc-freeres=no',
+                                 '--tool=memcheck', '--trace-children=yes',
+                                 '--error-exitcode=123']
+            if self.valgrindLeakCheck:
+                self.valgrindTriple += "_leak"
+                self.valgrindArgs.append('--leak-check=full')
+            else:
+                # The default is 'summary'.
+                self.valgrindArgs.append('--leak-check=no')
+            self.valgrindArgs.extend(self.valgrindUserArgs)
+
+
     def load_config(self, config, path):
         """load_config(config, path) - Load a config object from an alternate
         path."""
diff --git a/utils/lit/lit/LitFormats.py b/utils/lit/lit/LitFormats.py
index 270f087..e86f103 100644
--- a/utils/lit/lit/LitFormats.py
+++ b/utils/lit/lit/LitFormats.py
@@ -1,3 +1,2 @@
 from TestFormats import GoogleTest, ShTest, TclTest
 from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
-
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index 6ab3f9c..433e39a 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -73,12 +73,7 @@ class GoogleTest(object):
 
         cmd = [testPath, '--gtest_filter=' + testName]
         if litConfig.useValgrind:
-            valgrindArgs = ['valgrind', '-q',
-                            '--tool=memcheck', '--trace-children=yes',
-                            '--error-exitcode=123']
-            valgrindArgs.extend(litConfig.valgrindArgs)
-
-            cmd = valgrindArgs + cmd
+            cmd = litConfig.valgrindArgs + cmd
 
         out, err, exitCode = TestRunner.executeCommand(
             cmd, env=test.config.environment)
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 20fbc6c..29adff2 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -252,6 +252,14 @@ def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
         except:
             return (Test.FAIL, "Tcl 'exec' parse error on: %r" % ln)
 
+    if litConfig.useValgrind:
+        for pipeline in cmds:
+            if pipeline.commands:
+                # Only valgrind the first command in each pipeline, to avoid
+                # valgrinding things like grep, not, and FileCheck.
+                cmd = pipeline.commands[0]
+                cmd.args = litConfig.valgrindArgs + cmd.args
+
     cmd = cmds[0]
     for c in cmds[1:]:
         cmd = ShUtil.Seq(cmd, '&&', c)
@@ -327,12 +335,7 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
         if litConfig.useValgrind:
             # FIXME: Running valgrind on sh is overkill. We probably could just
             # run on clang with no real loss.
-            valgrindArgs = ['valgrind', '-q',
-                            '--tool=memcheck', '--trace-children=yes',
-                            '--error-exitcode=123']
-            valgrindArgs.extend(litConfig.valgrindArgs)
-
-            command = valgrindArgs + command
+            command = litConfig.valgrindArgs + command
 
     return executeCommand(command, cwd=cwd, env=test.config.environment)
 
diff --git a/utils/lit/lit/lit.py b/utils/lit/lit/lit.py
index f1f19c4..e800754 100755
--- a/utils/lit/lit/lit.py
+++ b/utils/lit/lit/lit.py
@@ -362,6 +362,9 @@ def main():
     group.add_option("", "--vg", dest="useValgrind",
                      help="Run tests under valgrind",
                      action="store_true", default=False)
+    group.add_option("", "--vg-leak", dest="valgrindLeakCheck",
+                     help="Check for memory leaks under valgrind",
+                     action="store_true", default=False)
     group.add_option("", "--vg-arg", dest="valgrindArgs", metavar="ARG",
                      help="Specify an extra argument for valgrind",
                      type=str, action="append", default=[])
@@ -411,7 +414,14 @@ def main():
         gSiteConfigName = '%s.site.cfg' % opts.configPrefix
 
     if opts.numThreads is None:
-        opts.numThreads = Util.detectCPUs()
+# Python <2.5 has a race condition causing lit to always fail with numThreads>1
+# http://bugs.python.org/issue1731717
+# I haven't seen this bug occur with 2.5.2 and later, so only enable multiple
+# threads by default there.
+       if sys.hexversion >= 0x2050200:
+               opts.numThreads = Util.detectCPUs()
+       else:
+               opts.numThreads = 1
 
     inputs = args
 
@@ -429,6 +439,7 @@ def main():
                                     path = opts.path,
                                     quiet = opts.quiet,
                                     useValgrind = opts.useValgrind,
+                                    valgrindLeakCheck = opts.valgrindLeakCheck,
                                     valgrindArgs = opts.valgrindArgs,
                                     useTclAsSh = opts.useTclAsSh,
                                     noExecute = opts.noExecute,