616 files changed, 19401 insertions, 7599 deletions
diff --git a/Makefile.config.in b/Makefile.config.in
index a3384e7..ec11bb3 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -156,6 +156,7 @@ TAR        := @TAR@
 # Paths to miscellaneous programs we hope are present but might not be
 PERL       := @PERL@
 BZIP2      := @BZIP2@
+CAT        := @CAT@
 DOT        := @DOT@
 DOXYGEN    := @DOXYGEN@
 GROFF      := @GROFF@
@@ -167,6 +168,7 @@ OCAMLDOC   := @OCAMLDOC@
 GAS        := @GAS@
 POD2HTML   := @POD2HTML@
 POD2MAN    := @POD2MAN@
+PDFROFF    := @PDFROFF@
 RUNTEST    := @RUNTEST@
 TCLSH      := @TCLSH@
 ZIP        := @ZIP@
@@ -268,6 +270,9 @@ ENABLE_SHARED := @ENABLE_SHARED@
 # Use -fvisibility-inlines-hidden?
 ENABLE_VISIBILITY_INLINES_HIDDEN := @ENABLE_VISIBILITY_INLINES_HIDDEN@
 
+# Do we want to allow timestamping information into builds?
+ENABLE_TIMESTAMPS := @ENABLE_TIMESTAMPS@
+
 # This option tells the Makefiles to produce verbose output.
 # It essentially prints the commands that make is executing
 #VERBOSE = 1
diff --git a/Makefile.rules b/Makefile.rules
index d70215e..4085881 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -447,6 +447,14 @@ else
   endif
 endif
 
+# Support makefile variable to disable any kind of timestamp/non-deterministic
+# info from being used in the build.
+ifeq ($(ENABLE_TIMESTAMPS),1)
+  DOTDIR_TIMESTAMP_COMMAND := $(DATE)
+else
+  DOTDIR_TIMESTAMP_COMMAND := echo 'Created.'
+endif
+
 ifeq ($(HOST_OS),MingW)
   # Work around PR4957
   CPP.Defines += -D__NO_CTYPE_INLINE
@@ -581,10 +589,14 @@ ifeq ($(TARGET_OS),Darwin)
 endif
 
 ifdef SHARED_LIBRARY
+ifneq ($(HOST_OS),Darwin)
+  LD.Flags += $(RPATH) -Wl,'$$ORIGIN'
+else
 ifneq ($(DARWIN_MAJVERS),4)
   LD.Flags += $(RPATH) -Wl,$(LibDir)
 endif
 endif
+endif
 
 ifdef TOOL_VERBOSE
   C.Flags += -v
@@ -779,7 +791,7 @@ $(DESTDIR)$(PROJ_bindir) $(DESTDIR)$(PROJ_libdir) $(DESTDIR)$(PROJ_includedir) $
 # To create other directories, as needed, and timestamp their creation
 %/.dir:
 	$(Verb) $(MKDIR) $* > /dev/null
-	$(Verb) $(DATE) > $@
+	$(Verb) $(DOTDIR_TIMESTAMP_COMMAND) > $@
 
 .PRECIOUS: $(ObjDir)/.dir $(LibDir)/.dir $(ToolDir)/.dir $(ExmplDir)/.dir
 .PRECIOUS: $(LLVMLibDir)/.dir $(LLVMToolDir)/.dir $(LLVMExmplDir)/.dir
@@ -1116,7 +1128,12 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
+
+ifdef LOADABLE_MODULE
+DestSharedLib = $(DESTDIR)$(PROJ_libdir)/$(LIBRARYNAME)$(SHLIBEXT)
+else
 DestSharedLib = $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME)$(SHLIBEXT)
+endif
 
 install-local:: $(DestSharedLib)
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 3b40196..8487d94 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -525,6 +525,20 @@ case "$enableval" in
   *) AC_MSG_ERROR([Invalid setting for --enable-shared. Use "yes" or "no"]) ;;
 esac
 
+dnl Enable embedding timestamp information into build.
+AC_ARG_ENABLE(timestamps,
+  AS_HELP_STRING([--enable-timestamps],
+                 [Enable embedding timestamp information in build (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+  no)  AC_SUBST(ENABLE_TIMESTAMPS,[0]) ;;
+  default) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-timestamps. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_TIMESTAMPS],$ENABLE_TIMESTAMPS,
+                   [Define if timestamp information (e.g., __DATE___) is allowed])
+
 dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
@@ -1003,11 +1017,13 @@ dnl are not found then they are set to "true" which always succeeds but does
 dnl nothing. This just lets the build output show that we could have done
 dnl something if the tool was available.
 AC_PATH_PROG(BZIP2, [bzip2])
+AC_PATH_PROG(CAT, [cat])
 AC_PATH_PROG(DOXYGEN, [doxygen])
 AC_PATH_PROG(GROFF, [groff])
 AC_PATH_PROG(GZIP, [gzip])
 AC_PATH_PROG(POD2HTML, [pod2html])
 AC_PATH_PROG(POD2MAN, [pod2man])
+AC_PATH_PROG(PDFROFF, [pdfroff])
 AC_PATH_PROG(RUNTEST, [runtest])
 DJ_AC_PATH_TCLSH
 AC_PATH_PROG(ZIP, [zip])
@@ -1543,9 +1559,6 @@ AC_CONFIG_FILES([Makefile.config])
 dnl Configure the RPM spec file for LLVM
 AC_CONFIG_FILES([llvm.spec])
 
-dnl Configure doxygen's configuration file
-AC_CONFIG_FILES([docs/doxygen.cfg])
-
 dnl Configure llvmc's Base plugin
 AC_CONFIG_FILES([tools/llvmc/plugins/Base/Base.td])
 
diff --git a/configure b/configure
index 3e5ad9c..755746f 100755
--- a/configure
+++ b/configure
@@ -690,6 +690,7 @@ ENABLE_DOXYGEN
 ENABLE_THREADS
 ENABLE_PIC
 ENABLE_SHARED
+ENABLE_TIMESTAMPS
 TARGETS_TO_BUILD
 LLVM_ENUM_TARGETS
 LLVM_ENUM_ASM_PRINTERS
@@ -736,11 +737,13 @@ INSTALL_PROGRAM
 INSTALL_SCRIPT
 INSTALL_DATA
 BZIP2
+CAT
 DOXYGEN
 GROFF
 GZIP
 POD2HTML
 POD2MAN
+PDFROFF
 RUNTEST
 TCLSH
 ZIP
@@ -1408,6 +1411,8 @@ Optional Features:
                           is YES)
   --enable-shared         Build a shared library and link tools against it
                           (default is NO)
+  --enable-timestamps     Enable embedding timestamp information in build
+                          (default is YES)
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, alpha, arm, mips, spu,
@@ -4921,6 +4926,30 @@ echo "$as_me: error: Invalid setting for --enable-shared. Use \"yes\" or \"no\""
    { (exit 1); exit 1; }; } ;;
 esac
 
+# Check whether --enable-timestamps was given.
+if test "${enable_timestamps+set}" = set; then
+  enableval=$enable_timestamps;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_TIMESTAMPS=1
+ ;;
+  no)  ENABLE_TIMESTAMPS=0
+ ;;
+  default) ENABLE_TIMESTAMPS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_TIMESTAMPS $ENABLE_TIMESTAMPS
+_ACEOF
+
+
 TARGETS_TO_BUILD=""
 # Check whether --enable-targets was given.
 if test "${enable_targets+set}" = set; then
@@ -8016,6 +8045,46 @@ echo "${ECHO_T}no" >&6; }
 fi
 
 
+# Extract the first word of "cat", so it can be a program name with args.
+set dummy cat; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CAT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CAT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CAT="$CAT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CAT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+CAT=$ac_cv_path_CAT
+if test -n "$CAT"; then
+  { echo "$as_me:$LINENO: result: $CAT" >&5
+echo "${ECHO_T}$CAT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
 # Extract the first word of "doxygen", so it can be a program name with args.
 set dummy doxygen; ac_word=$2
 { echo "$as_me:$LINENO: checking for $ac_word" >&5
@@ -8216,6 +8285,46 @@ echo "${ECHO_T}no" >&6; }
 fi
 
 
+# Extract the first word of "pdfroff", so it can be a program name with args.
+set dummy pdfroff; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_PDFROFF+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $PDFROFF in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PDFROFF="$PDFROFF" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_PDFROFF="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+PDFROFF=$ac_cv_path_PDFROFF
+if test -n "$PDFROFF"; then
+  { echo "$as_me:$LINENO: result: $PDFROFF" >&5
+echo "${ECHO_T}$PDFROFF" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
 # Extract the first word of "runtest", so it can be a program name with args.
 set dummy runtest; ac_word=$2
 { echo "$as_me:$LINENO: checking for $ac_word" >&5
@@ -11275,7 +11384,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11278 "configure"
+#line 11387 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -20300,9 +20409,6 @@ ac_config_files="$ac_config_files Makefile.config"
 ac_config_files="$ac_config_files llvm.spec"
 
 
-ac_config_files="$ac_config_files docs/doxygen.cfg"
-
-
 ac_config_files="$ac_config_files tools/llvmc/plugins/Base/Base.td"
 
 
@@ -20922,7 +21028,6 @@ do
     "include/llvm/System/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/System/DataTypes.h" ;;
     "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
     "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
-    "docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxygen.cfg" ;;
     "tools/llvmc/plugins/Base/Base.td") CONFIG_FILES="$CONFIG_FILES tools/llvmc/plugins/Base/Base.td" ;;
     "tools/llvm-config/llvm-config.in") CONFIG_FILES="$CONFIG_FILES tools/llvm-config/llvm-config.in" ;;
     "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
@@ -21089,6 +21194,7 @@ ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 ENABLE_THREADS!$ENABLE_THREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
+ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
 TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
 LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
 LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
@@ -21097,7 +21203,6 @@ LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim
 ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
 CLANGPATH!$CLANGPATH$ac_delim
 CLANGXXPATH!$CLANGXXPATH$ac_delim
-ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -21139,6 +21244,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim
 OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
@@ -21176,11 +21282,13 @@ INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
 INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim
 INSTALL_DATA!$INSTALL_DATA$ac_delim
 BZIP2!$BZIP2$ac_delim
+CAT!$CAT$ac_delim
 DOXYGEN!$DOXYGEN$ac_delim
 GROFF!$GROFF$ac_delim
 GZIP!$GZIP$ac_delim
 POD2HTML!$POD2HTML$ac_delim
 POD2MAN!$POD2MAN$ac_delim
+PDFROFF!$PDFROFF$ac_delim
 RUNTEST!$RUNTEST$ac_delim
 TCLSH!$TCLSH$ac_delim
 ZIP!$ZIP$ac_delim
@@ -21233,7 +21341,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html
index ef0f414..0413622 100644
--- a/docs/AliasAnalysis.html
+++ b/docs/AliasAnalysis.html
@@ -930,7 +930,7 @@ analysis directly.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-01 20:24:17 +0100 (Mon, 01 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index 7413752..f1f175d 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -40,8 +40,9 @@
   </li>
 </ol>
 <div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-  and <a href="http://www.reverberate.org">Joshua Haberman</a>.
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
+  <a href="http://www.reverberate.org">Joshua Haberman</a>,
+  and <a href="mailto:housel@acm.org">Peter S. Housel</a>.
 </p>
 </div>
 
@@ -1037,8 +1038,17 @@ fields of <tt>FUNCTION</tt> records.</p>
 
 <div class="doc_text">
 
-<p>The <tt>PARAMATTR_BLOCK</tt> block (id 9) ...
-</p>
+<p>The <tt>PARAMATTR_BLOCK</tt> block (id 9) contains a table of
+entries describing the attributes of function parameters. These
+entries are referenced by 1-based index in the <i>paramattr</i> field
+of module block <a name="MODULE_CODE_FUNCTION"><tt>FUNCTION</tt></a>
+records, or within the <i>attr</i> field of function block <a
+href="#FUNC_CODE_INST_INVOKE"><tt>INST_INVOKE</tt></a> and <a
+href="#FUNC_CODE_INST_CALL"><tt>INST_CALL</tt></a> records.</p>
+
+<p>Entries within <tt>PARAMATTR_BLOCK</tt> are constructed to ensure
+that each is unique (i.e., no two indicies represent equivalent
+attribute lists). </p>
 
 </div>
 
@@ -1051,8 +1061,41 @@ fields of <tt>FUNCTION</tt> records.</p>
 
 <p><tt>[ENTRY, paramidx0, attr0, paramidx1, attr1...]</tt></p>
 
-<p>The <tt>ENTRY</tt> record (code 1) ...
+<p>The <tt>ENTRY</tt> record (code 1) contains an even number of
+values describing a unique set of function parameter attributes. Each
+<i>paramidx</i> value indicates which set of attributes is
+represented, with 0 representing the return value attributes,
+0xFFFFFFFF representing function attributes, and other values
+representing 1-based function parameters. Each <i>attr</i> value is a
+bitmap with the following interpretation:
 </p>
+
+<ul>
+<li>bit 0: <tt>zeroext</tt></li>
+<li>bit 1: <tt>signext</tt></li>
+<li>bit 2: <tt>noreturn</tt></li>
+<li>bit 3: <tt>inreg</tt></li>
+<li>bit 4: <tt>sret</tt></li>
+<li>bit 5: <tt>nounwind</tt></li>
+<li>bit 6: <tt>noalias</tt></li>
+<li>bit 7: <tt>byval</tt></li>
+<li>bit 8: <tt>nest</tt></li>
+<li>bit 9: <tt>readnone</tt></li>
+<li>bit 10: <tt>readonly</tt></li>
+<li>bit 11: <tt>noinline</tt></li>
+<li>bit 12: <tt>alwaysinline</tt></li>
+<li>bit 13: <tt>optsize</tt></li>
+<li>bit 14: <tt>ssp</tt></li>
+<li>bit 15: <tt>sspreq</tt></li>
+<li>bits 16&ndash;31: <tt>align <var>n</var></tt></li>
+<li>bit 32: <tt>nocapture</tt></li>
+<li>bit 33: <tt>noredzone</tt></li>
+<li>bit 34: <tt>noimplicitfloat</tt></li>
+<li>bit 35: <tt>naked</tt></li>
+<li>bit 36: <tt>inlinehint</tt></li>
+<li>bits 37&ndash;39: <tt>alignstack <var>n</var></tt>, represented as
+the logarithm base 2 of the requested alignment, plus 1</li>
+</ul>
 </div>
 
 <!-- ======================================================================= -->
@@ -1061,11 +1104,283 @@ fields of <tt>FUNCTION</tt> records.</p>
 
 <div class="doc_text">
 
-<p>The <tt>TYPE_BLOCK</tt> block (id 10) ...
+<p>The <tt>TYPE_BLOCK</tt> block (id 10) contains records which
+constitute a table of type operator entries used to represent types
+referenced within an LLVM module. Each record (with the exception of
+<a href="#TYPE_CODE_NUMENTRY"><tt>NUMENTRY</tt></a>) generates a
+single type table entry, which may be referenced by 0-based index from
+instructions, constants, metadata, type symbol table entries, or other
+type operator records.
+</p>
+
+<p>Entries within <tt>TYPE_BLOCK</tt> are constructed to ensure that
+each entry is unique (i.e., no two indicies represent structurally
+equivalent types). </p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_NUMENTRY">TYPE_CODE_NUMENTRY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[NUMENTRY, numentries]</tt></p>
+
+<p>The <tt>NUMENTRY</tt> record (code 1) contains a single value which
+indicates the total number of type code entries in the type table of
+the module. If present, <tt>NUMENTRY</tt> should be the first record
+in the block.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_VOID">TYPE_CODE_VOID Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[VOID]</tt></p>
+
+<p>The <tt>VOID</tt> record (code 2) adds a <tt>void</tt> type to the
+type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_FLOAT">TYPE_CODE_FLOAT Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FLOAT]</tt></p>
+
+<p>The <tt>FLOAT</tt> record (code 3) adds a <tt>float</tt> (32-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_DOUBLE">TYPE_CODE_DOUBLE Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[DOUBLE]</tt></p>
+
+<p>The <tt>DOUBLE</tt> record (code 4) adds a <tt>double</tt> (64-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_LABEL">TYPE_CODE_LABEL Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[LABEL]</tt></p>
+
+<p>The <tt>LABEL</tt> record (code 5) adds a <tt>label</tt> type to
+the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_OPAQUE">TYPE_CODE_OPAQUE Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[OPAQUE]</tt></p>
+
+<p>The <tt>OPAQUE</tt> record (code 6) adds an <tt>opaque</tt> type to
+the type table. Note that distinct <tt>opaque</tt> types are not
+unified.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_INTEGER">TYPE_CODE_INTEGER  Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[INTEGER, width]</tt></p>
+
+<p>The <tt>INTEGER</tt> record (code 7) adds an integer type to the
+type table. The single <i>width</i> field indicates the width of the
+integer type.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_POINTER">TYPE_CODE_POINTER Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[POINTER, pointee type, address space]</tt></p>
+
+<p>The <tt>POINTER</tt> record (code 8) adds a pointer type to the
+type table. The operand fields are</p>
+
+<ul>
+<li><i>pointee type</i>: The type index of the pointed-to type</li>
+
+<li><i>address space</i>: If supplied, the target-specific numbered
+address space where the pointed-to object resides. Otherwise, the
+default address space is zero.
+</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_FUNCTION">TYPE_CODE_FUNCTION Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FUNCTION, vararg, ignored, retty, ...paramty... ]</tt></p>
+
+<p>The <tt>FUNCTION</tt> record (code 9) adds a function type to the
+type table. The operand fields are</p>
+
+<ul>
+<li><i>vararg</i>: Non-zero if the type represents a varargs function</li>
+
+<li><i>ignored</i>: This value field is present for backward
+compatibility only, and is ignored</li>
+
+<li><i>retty</i>: The type index of the function's return type</li>
+
+<li><i>paramty</i>: Zero or more type indices representing the
+parameter types of the function</li>
+</ul>
+	
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_STRUCT">TYPE_CODE_STRUCT Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[STRUCT, ispacked, ...eltty...]</tt></p>
+
+<p>The <tt>STRUCT </tt> record (code 10) adds a struct type to the
+type table. The operand fields are</p>
+
+<ul>
+<li><i>ispacked</i>: Non-zero if the type represents a packed structure</li>
+
+<li><i>eltty</i>: Zero or more type indices representing the element
+types of the structure</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_ARRAY">TYPE_CODE_ARRAY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ARRAY, numelts, eltty]</tt></p>
+
+<p>The <tt>ARRAY</tt> record (code 11) adds an array type to the type
+table.  The operand fields are</p>
+
+<ul>
+<li><i>numelts</i>: The number of elements in arrays of this type</li>
+
+<li><i>eltty</i>: The type index of the array element type</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_VECTOR">TYPE_CODE_VECTOR Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[VECTOR, numelts, eltty]</tt></p>
+
+<p>The <tt>VECTOR</tt> record (code 12) adds a vector type to the type
+table.  The operand fields are</p>
+
+<ul>
+<li><i>numelts</i>: The number of elements in vectors of this type</li>
+
+<li><i>eltty</i>: The type index of the vector element type</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_X86_FP80">TYPE_CODE_X86_FP80 Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[X86_FP80]</tt></p>
+
+<p>The <tt>X86_FP80</tt> record (code 13) adds an <tt>x86_fp80</tt> (80-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_FP128">TYPE_CODE_FP128 Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FP128]</tt></p>
+
+<p>The <tt>FP128</tt> record (code 14) adds an <tt>fp128</tt> (128-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_PPC_FP128">TYPE_CODE_PPC_FP128 Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[PPC_FP128]</tt></p>
+
+<p>The <tt>PPC_FP128</tt> record (code 15) adds a <tt>ppc_fp128</tt>
+(128-bit floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_METADATA">TYPE_CODE_METADATA Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[METADATA]</tt></p>
+
+<p>The <tt>METADATA</tt> record (code 16) adds a <tt>metadata</tt>
+type to the type table.
 </p>
+</div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_UNION">TYPE_CODE_UNION Record</a>
 </div>
 
+<div class="doc_text">
+
+<p><tt>[UNION, ...eltty...]</tt></p>
+
+<p>The <tt>UNION</tt> record (code 17) adds a <tt>union</tt> type to
+the type table. The <i>eltty</i> operand fields are zero or more type
+indices representing the element types of the union.
+</p>
+
+</div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"><a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a>
@@ -1107,11 +1422,28 @@ fields of <tt>FUNCTION</tt> records.</p>
 
 <div class="doc_text">
 
-<p>The <tt>TYPE_SYMTAB_BLOCK</tt> block (id 13) ...
+<p>The <tt>TYPE_SYMTAB_BLOCK</tt> block (id 13) contains entries which
+map between module-level named types and their corresponding type
+indices.
 </p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TST_CODE_ENTRY">TST_CODE_ENTRY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ENTRY, typeid, ...string...]</tt></p>
+
+<p>The <tt>ENTRY</tt> record (code 1) contains a variable number of
+values, with the first giving the type index of the designated type,
+and the remaining values giving the character codes of the type
+name. Each entry corresponds to a single named type.
+</p>
+</div>
+
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"><a name="VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a>
@@ -1157,7 +1489,7 @@ fields of <tt>FUNCTION</tt> records.</p>
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2010-01-20 18:53:51 +0100 (Wed, 20 Jan 2010) $
+Last modified: $Date: 2010-05-22 00:20:54 +0200 (Sat, 22 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html
index c89b4c5..cbd71aa 100644
--- a/docs/Bugpoint.html
+++ b/docs/Bugpoint.html
@@ -243,7 +243,7 @@ non-obvious ways.  Here are some hints and tips:<p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-12 20:12:47 +0200 (Mon, 12 Oct 2009) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index b2c3b83..ec050bf 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -2162,7 +2162,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-09 20:39:54 +0200 (Fri, 09 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
index 8dc5678..bb88a91 100644
--- a/docs/CodingStandards.html
+++ b/docs/CodingStandards.html
@@ -1346,7 +1346,7 @@ something.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-26 21:18:32 +0100 (Fri, 26 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index d0212b9..62cb776 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -148,7 +148,7 @@ options) arguments to the tool you are interested in.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-18 15:08:13 +0100 (Thu, 18 Feb 2010) $
+  Last modified: $Date: 2010-05-11 18:47:42 +0200 (Tue, 11 May 2010) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
index 72d9d2b..989a5d7 100644
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@@ -253,8 +253,8 @@ files passed on the command line. You can use B<--show-suites> to display the
 discovered test suites at startup.
 
 Once a test suite is discovered, its config file is loaded. Config files
-themselves are just Python modules which will be executed. When the config file
-is executed, two important global variables are predefined:
+themselves are Python modules which will be executed. When the config file is
+executed, two important global variables are predefined:
 
 =over
 
diff --git a/docs/CommandLine.html b/docs/CommandLine.html
index 092fd2b..2e5b3a2 100644
--- a/docs/CommandLine.html
+++ b/docs/CommandLine.html
@@ -1972,7 +1972,7 @@ tutorial.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-26 21:18:32 +0100 (Fri, 26 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index a0eed43..c63e174 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -748,7 +748,7 @@ the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behav
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2010-02-18 15:08:13 +0100 (Thu, 18 Feb 2010) $
+Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address></div>
 </div>
 </div>
diff --git a/docs/CompilerWriterInfo.html b/docs/CompilerWriterInfo.html
index 00ba696..6cd9d7d 100644
--- a/docs/CompilerWriterInfo.html
+++ b/docs/CompilerWriterInfo.html
@@ -256,7 +256,7 @@ processors.</li>
 
   <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index 1f9451d..37bfb89 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -601,7 +601,7 @@ Changes</a></div>
   Written by the 
   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-26 21:18:32 +0100 (Fri, 26 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
index dfe7ee8..2c85574 100644
--- a/docs/ExceptionHandling.html
+++ b/docs/ExceptionHandling.html
@@ -492,6 +492,26 @@
 
 <!-- ======================================================================= -->
 <div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_longjmp">llvm.eh.sjlj.longjmp</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  void %<a href="#llvm_eh_sjlj_longjmp">llvm.eh.sjlj.setjmp</a>(i8*)
+</pre>
+
+<p>The <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a>
+   intrinsic is used to implement <tt>__builtin_longjmp()</tt> for SJLJ
+   style exception handling. The single parameter is a pointer to a
+   buffer populated by <a href="#llvm_eh_sjlj_setjmp">
+     <tt>llvm.eh.sjlj.setjmp</tt></a>. The frame pointer and stack pointer
+   are restored from the buffer, then control is transfered to the
+   destination address.</p>
+
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
   <a name="llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>
 </div>
 
@@ -599,7 +619,7 @@
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-01-28 02:45:32 +0100 (Thu, 28 Jan 2010) $
+  Last modified: $Date: 2010-05-26 18:21:41 +0200 (Wed, 26 May 2010) $
 </address>
 
 </body>
diff --git a/docs/ExtendingLLVM.html b/docs/ExtendingLLVM.html
index 8351f1a..2237560 100644
--- a/docs/ExtendingLLVM.html
+++ b/docs/ExtendingLLVM.html
@@ -384,7 +384,7 @@ void calcTypeName(const Type *Ty,
 
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/FAQ.html b/docs/FAQ.html
index eb162f1..6e560f5 100644
--- a/docs/FAQ.html
+++ b/docs/FAQ.html
@@ -931,7 +931,7 @@ F.i:
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-04 20:16:00 +0200 (Tue, 04 May 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html
index 873faa6..f4eccf79 100644
--- a/docs/GCCFEBuildInstrs.html
+++ b/docs/GCCFEBuildInstrs.html
@@ -272,7 +272,7 @@ More information is <a href="FAQ.html#license">available in the FAQ</a>.
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-07-05 14:01:44 +0200 (Sun, 05 Jul 2009) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
index a372f69..fde070c 100644
--- a/docs/GarbageCollection.html
+++ b/docs/GarbageCollection.html
@@ -617,7 +617,7 @@ conforms to the binary interface defined by library, most essentially the
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN MyGC : public GCStrategy {
+  class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
   public:
     MyGC() {}
   };
@@ -1229,7 +1229,7 @@ generation in the JIT, nor using the object writers.</p>
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN MyGCPrinter : public GCMetadataPrinter {
+  class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
   public:
     virtual void beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
                                const TargetAsmInfo &amp;TAI);
@@ -1380,7 +1380,7 @@ Fergus Henderson. International Symposium on Memory Management 2002.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-08-05 17:42:44 +0200 (Wed, 05 Aug 2009) $
+  Last modified: $Date: 2010-05-11 22:16:09 +0200 (Tue, 11 May 2010) $
 </address>
 
 </body>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
index f6dd71f..9e1c8b8 100644
--- a/docs/GetElementPtr.html
+++ b/docs/GetElementPtr.html
@@ -728,7 +728,7 @@ idx3 = (char*) &amp;MyVar + 8
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
-  Last modified: $Date: 2010-02-25 19:16:03 +0100 (Thu, 25 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index d301e4c..7103d13 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -452,8 +452,8 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
     <li><a name="sf3">Only needed if you want to run the automated test 
       suite in the <tt>llvm/test</tt> directory.</a></li>
     <li><a name="sf4">If you want to make changes to the configure scripts, 
-      you will need GNU autoconf (2.59), and consequently, GNU M4 (version 1.4 
-      or higher). You will also need automake (1.9.2). We only use aclocal 
+      you will need GNU autoconf (2.60), and consequently, GNU M4 (version 1.4 
+      or higher). You will also need automake (1.9.6). We only use aclocal 
       from that package.</a></li>
   </ol>
   </div>
@@ -1673,7 +1673,7 @@ out:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-18 15:08:13 +0100 (Thu, 18 Feb 2010) $
+  Last modified: $Date: 2010-05-19 09:00:17 +0200 (Wed, 19 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html
index c002450..e467e08 100644
--- a/docs/GettingStartedVS.html
+++ b/docs/GettingStartedVS.html
@@ -411,7 +411,7 @@ out:</p>
 
   <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-08-05 17:42:44 +0200 (Wed, 05 Aug 2009) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
index ae0b133..b966f45 100644
--- a/docs/HowToReleaseLLVM.html
+++ b/docs/HowToReleaseLLVM.html
@@ -517,7 +517,7 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.cs.uiuc.edu">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-04-05 20:35:37 +0200 (Mon, 05 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
index 5cf935d..64c6141 100644
--- a/docs/HowToSubmitABug.html
+++ b/docs/HowToSubmitABug.html
@@ -340,7 +340,7 @@ the following:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-05-02 17:36:26 +0200 (Sun, 02 May 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 3ee8de7..119436a 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -224,7 +224,7 @@
           <li><a href="#int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a></li>
           <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
           <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
-          <li><a href="#int_readcyclecounter"><tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
+          <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
         </ol>
       </li>
       <li><a href="#int_libc">Standard C Library Intrinsics</a>
@@ -7217,10 +7217,10 @@ LLVM</a>.</p>
    any integer bit width. Not all targets support all bit widths however.</p>
 
 <pre>
-  declare i8 @llvm.atomic.load.add.i8..p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-  declare i16 @llvm.atomic.load.add.i16..p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-  declare i32 @llvm.atomic.load.add.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-  declare i64 @llvm.atomic.load.add.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+  declare i8 @llvm.atomic.load.add.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.add.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.add.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.add.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <h5>Overview:</h5>
@@ -7412,8 +7412,8 @@ LLVM</a>.</p>
 <pre>
   declare i8 @llvm.atomic.load.min.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
   declare i16 @llvm.atomic.load.min.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-  declare i32 @llvm.atomic.load.min.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-  declare i64 @llvm.atomic.load.min.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.min.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.min.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
@@ -7424,10 +7424,10 @@ LLVM</a>.</p>
 </pre>
 
 <pre>
-  declare i8 @llvm.atomic.load.umin.i8..p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i8 @llvm.atomic.load.umin.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
   declare i16 @llvm.atomic.load.umin.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-  declare i32 @llvm.atomic.load.umin.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-  declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.umin.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.umin.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <h5>Overview:</h5>
@@ -7773,7 +7773,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-03 16:59:34 +0200 (Mon, 03 May 2010) $
+  Last modified: $Date: 2010-05-26 23:56:15 +0200 (Wed, 26 May 2010) $
 </address>
 
 </body>
diff --git a/docs/Lexicon.html b/docs/Lexicon.html
index 576b732..09fae39 100644
--- a/docs/Lexicon.html
+++ b/docs/Lexicon.html
@@ -269,7 +269,7 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a><a
  href="http://llvm.org/">The LLVM Team</a><br>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2010-04-05 07:48:47 +0200 (Mon, 05 Apr 2010) $
+Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 <!-- vim: sw=2
 -->
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
index c6a2399..03dc677 100644
--- a/docs/LinkTimeOptimization.html
+++ b/docs/LinkTimeOptimization.html
@@ -382,7 +382,7 @@ of the native object files.</p>
 
   Devang Patel and Nick Kledzik<br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-25 00:01:40 +0200 (Sun, 25 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
index eb91a21..2e2fdc5 100644
--- a/docs/MakefileGuide.html
+++ b/docs/MakefileGuide.html
@@ -1025,7 +1025,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-23 11:00:53 +0100 (Tue, 23 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/Packaging.html b/docs/Packaging.html
index 678edb0..7aa4a1c 100644
--- a/docs/Packaging.html
+++ b/docs/Packaging.html
@@ -112,7 +112,7 @@ line numbers.</dd>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-26 23:25:06 +0100 (Fri, 26 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/Passes.html b/docs/Passes.html
index 5d8120c..c239e44 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -85,6 +85,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#dot-callgraph">-dot-callgraph</a></td><td>Print Call Graph to 'dot' file</td></tr>
 <tr><td><a href="#dot-cfg">-dot-cfg</a></td><td>Print CFG of function to 'dot' file</td></tr>
 <tr><td><a href="#dot-cfg-only">-dot-cfg-only</a></td><td>Print CFG of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#dot-dom">-dot-dom</a></td><td>Print dominator tree of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-dom-only">-dot-dom-only</a></td><td>Print dominator tree of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#dot-postdom">-dot-postdom</a></td><td>Print post dominator tree of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-postdom-only">-dot-postdom-only</a></td><td>Print post dominator tree of function to 'dot' file (with no function bodies)</td></tr>
 <tr><td><a href="#globalsmodref-aa">-globalsmodref-aa</a></td><td>Simple mod/ref analysis for globals</td></tr>
 <tr><td><a href="#instcount">-instcount</a></td><td>Counts the various types of Instructions</td></tr>
 <tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr>
@@ -177,6 +181,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#verify">-verify</a></td><td>Module Verifier</td></tr>
 <tr><td><a href="#view-cfg">-view-cfg</a></td><td>View CFG of function</td></tr>
 <tr><td><a href="#view-cfg-only">-view-cfg-only</a></td><td>View CFG of function (with no function bodies)</td></tr>
+<tr><td><a href="#view-dom">-view-dom</a></td><td>View dominator tree of function</td></tr>
+<tr><td><a href="#view-dom-only">-view-dom-only</a></td><td>View dominator tree of function (with no function bodies)</td></tr>
+<tr><td><a href="#view-postdom">-view-postdom</a></td><td>View post dominator tree of function</td></tr>
+<tr><td><a href="#view-postdom-only">-view-postdom-only</a></td><td>View post dominator tree of function (with no function bodies)</td></tr>
 </table>
 </div>
 
@@ -321,6 +329,58 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
+  <a name="dot-dom">Print dominator tree of function to 'dot' file</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the dominator tree
+  into a <code>.dot</code> graph.  This graph can then be processed with the
+  "dot" tool to convert it to postscript or some other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-dom-only">Print dominator tree of function to 'dot' file (with no
+  function bodies)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the dominator tree
+  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
+  then be processed with the "dot" tool to convert it to postscript or some
+  other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-postdom">Print post dominator tree of function to 'dot' file</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the post dominator tree
+  into a <code>.dot</code> graph.  This graph can then be processed with the
+  "dot" tool to convert it to postscript or some other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-postdom-only">Print post dominator tree of function to 'dot' file
+  (with no function bodies)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the post dominator tree
+  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
+  then be processed with the "dot" tool to convert it to postscript or some
+  other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
   <a name="globalsmodref-aa">Simple mod/ref analysis for globals</a>
 </div>
 <div class="doc_text">
@@ -1752,6 +1812,52 @@ if (X &lt; 3) {</pre>
   </p>
 </div>
 
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-dom">View dominator tree of function</a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the dominator tree using the GraphViz tool.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-dom-only">View dominator tree of function (with no function
+  bodies)
+  </a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the dominator tree using the GraphViz tool, but omitting function
+  bodies.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-postdom">View post dominator tree of function</a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the post dominator tree using the GraphViz tool.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-postdom-only">View post dominator tree of function (with no
+  function bodies)
+  </a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the post dominator tree using the GraphViz tool, but omitting
+  function bodies.
+  </p>
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -1763,7 +1869,7 @@ if (X &lt; 3) {</pre>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-22 22:48:34 +0200 (Thu, 22 Apr 2010) $
+  Last modified: $Date: 2010-05-07 11:33:18 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index 23cfbf6..ed6a2b7 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -3943,7 +3943,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-02 02:08:26 +0200 (Fri, 02 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/Projects.html b/docs/Projects.html
index 582c4e2..76da086 100644
--- a/docs/Projects.html
+++ b/docs/Projects.html
@@ -453,7 +453,7 @@ Mailing List</a>.</p>
   <a href="mailto:criswell@uiuc.edu">John Criswell</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2009-08-13 22:08:52 +0200 (Thu, 13 Aug 2009) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 5422a93..a5a35dd 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -1230,7 +1230,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-04 01:52:21 +0200 (Tue, 04 May 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 9a4a6f1..e017530 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -1773,7 +1773,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-05 06:11:11 +0200 (Mon, 05 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
index aca7e5d..b81b1a8 100644
--- a/docs/SystemLibrary.html
+++ b/docs/SystemLibrary.html
@@ -313,7 +313,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-07-17 23:11:24 +0200 (Fri, 17 Jul 2009) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 388a090..4ad6191 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -795,7 +795,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-22 18:45:27 +0200 (Thu, 22 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 76d6de0..3149f46 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -1206,7 +1206,7 @@ know. Thanks!</p>
 
   John T. Criswell, Reid Spencer, and Tanya Lattner<br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-26 22:23:59 +0100 (Fri, 26 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html
index c6bcaf0..e7a1d3d 100644
--- a/docs/UsingLibraries.html
+++ b/docs/UsingLibraries.html
@@ -432,7 +432,7 @@
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
 </address>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a> 
-<br>Last modified: $Date: 2009-07-24 02:30:09 +0200 (Fri, 24 Jul 2009) $ </div>
+<br>Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ </div>
 </body>
 </html>
 <!-- vim: sw=2 ts=2 ai
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index e513cea..0b9dd9f 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -2554,7 +2554,7 @@ with assembler.
   <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-04-05 06:11:11 +0200 (Mon, 05 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index e592da4..e353c2e 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -1835,7 +1835,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-17 01:07:44 +0200 (Sat, 17 Apr 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 
 </body>
diff --git a/docs/index.html b/docs/index.html
index cd43f88..74c39f6 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -287,7 +287,7 @@ times each day, making it a high volume list.</li>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-26 01:54:42 +0100 (Fri, 26 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body></html>
 
diff --git a/docs/tutorial/LangImpl1.html b/docs/tutorial/LangImpl1.html
index 9f4737d..c256af4 100644
--- a/docs/tutorial/LangImpl1.html
+++ b/docs/tutorial/LangImpl1.html
@@ -342,7 +342,7 @@ so that you can use the lexer and parser together.
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-03 18:27:31 +0100 (Wed, 03 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html
index f06db16..f39ed6c 100644
--- a/docs/tutorial/LangImpl2.html
+++ b/docs/tutorial/LangImpl2.html
@@ -1227,7 +1227,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-03 18:27:31 +0100 (Wed, 03 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
index 80c9fc2..602fd7d 100644
--- a/docs/tutorial/LangImpl3.html
+++ b/docs/tutorial/LangImpl3.html
@@ -1263,7 +1263,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-02 02:11:08 +0100 (Tue, 02 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
index 4520c46..aaec2b6 100644
--- a/docs/tutorial/LangImpl4.html
+++ b/docs/tutorial/LangImpl4.html
@@ -1126,7 +1126,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-02 02:11:08 +0100 (Tue, 02 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
index 3256c40..6f1f9df 100644
--- a/docs/tutorial/LangImpl5.html
+++ b/docs/tutorial/LangImpl5.html
@@ -1771,7 +1771,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-02 02:11:08 +0100 (Tue, 02 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index f70bc50..47f08dc 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -1808,7 +1808,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-11 20:15:20 +0100 (Thu, 11 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
index 3d24739..ddd5a9b 100644
--- a/docs/tutorial/LangImpl7.html
+++ b/docs/tutorial/LangImpl7.html
@@ -2158,7 +2158,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-02 02:11:08 +0100 (Tue, 02 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl8.html b/docs/tutorial/LangImpl8.html
index 5effd70..fe42a22 100644
--- a/docs/tutorial/LangImpl8.html
+++ b/docs/tutorial/LangImpl8.html
@@ -359,7 +359,7 @@ Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-03 18:27:31 +0100 (Wed, 03 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl1.html b/docs/tutorial/OCamlLangImpl1.html
index f3345aa..4410613 100644
--- a/docs/tutorial/OCamlLangImpl1.html
+++ b/docs/tutorial/OCamlLangImpl1.html
@@ -359,7 +359,7 @@ include a driver so that you can use the lexer and parser together.
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-03 18:27:31 +0100 (Wed, 03 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl2.html b/docs/tutorial/OCamlLangImpl2.html
index a59e829..41d0956 100644
--- a/docs/tutorial/OCamlLangImpl2.html
+++ b/docs/tutorial/OCamlLangImpl2.html
@@ -1039,7 +1039,7 @@ main ()
   <a href="mailto:sabre@nondot.org">Chris Lattner</a>
   <a href="mailto:erickt@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-02-03 18:27:31 +0100 (Wed, 03 Feb 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html
index 01fe62f..06dded1 100644
--- a/docs/tutorial/OCamlLangImpl3.html
+++ b/docs/tutorial/OCamlLangImpl3.html
@@ -1087,7 +1087,7 @@ main ()
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-08 20:32:18 +0100 (Mon, 08 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html
index 6d9a6c6..3277e10 100644
--- a/docs/tutorial/OCamlLangImpl4.html
+++ b/docs/tutorial/OCamlLangImpl4.html
@@ -1023,7 +1023,7 @@ extern double putchard(double X) {
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-08 20:32:18 +0100 (Mon, 08 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html
index b0440ac..7194a02 100644
--- a/docs/tutorial/OCamlLangImpl5.html
+++ b/docs/tutorial/OCamlLangImpl5.html
@@ -1563,7 +1563,7 @@ operators</a>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-08 20:32:18 +0100 (Mon, 08 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html
index c10d5bb..f365e2d 100644
--- a/docs/tutorial/OCamlLangImpl6.html
+++ b/docs/tutorial/OCamlLangImpl6.html
@@ -1568,7 +1568,7 @@ SSA construction</a>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-22 00:15:13 +0100 (Mon, 22 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html
index c360f2a..8f95389 100644
--- a/docs/tutorial/OCamlLangImpl7.html
+++ b/docs/tutorial/OCamlLangImpl7.html
@@ -1901,7 +1901,7 @@ extern double printd(double X) {
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  Last modified: $Date: 2010-03-22 00:15:13 +0100 (Mon, 22 Mar 2010) $
+  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
 </address>
 </body>
 </html>
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 393473bd..5c99473 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -79,13 +79,14 @@ public:
   typedef DenseMapIterator<KeyT, ValueT,
                            KeyInfoT, ValueInfoT, true> const_iterator;
   inline iterator begin() {
-     return iterator(Buckets, Buckets+NumBuckets);
+    // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets().
+    return empty() ? end() : iterator(Buckets, Buckets+NumBuckets);
   }
   inline iterator end() {
     return iterator(Buckets+NumBuckets, Buckets+NumBuckets);
   }
   inline const_iterator begin() const {
-    return const_iterator(Buckets, Buckets+NumBuckets);
+    return empty() ? end() : const_iterator(Buckets, Buckets+NumBuckets);
   }
   inline const_iterator end() const {
     return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets);
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index 5f89823..91a1429 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -191,7 +191,7 @@ public:
   /// insert - Insert a new value into the union/find set, ignoring the request
   /// if the value already exists.
   iterator insert(const ElemTy &Data) {
-    return TheMapping.insert(Data).first;
+    return TheMapping.insert(ECValue(Data)).first;
   }
 
   /// findLeader - Given a value in the set, return a member iterator for the
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 6c813ec..0862981 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -889,13 +889,17 @@ operator-(const SparseBitVector<ElementSize> &LHS,
 // Dump a SparseBitVector to a stream
 template <unsigned ElementSize>
 void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
-  out << "[ ";
-
-  typename SparseBitVector<ElementSize>::iterator bi;
-  for (bi = LHS.begin(); bi != LHS.end(); ++bi) {
-    out << *bi << " ";
+  out << "[";
+
+  typename SparseBitVector<ElementSize>::iterator bi = LHS.begin(),
+    be = LHS.end();
+  if (bi != be) {
+    out << *bi;
+    for (++bi; bi != be; ++bi) {
+      out << " " << *bi;
+    }
   }
-  out << " ]\n";
+  out << "]\n";
 }
 } // end namespace llvm
 
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index ab6358b..33756f6 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -128,6 +128,10 @@ namespace llvm {
     /// compare_lower - Compare two strings, ignoring case.
     int compare_lower(StringRef RHS) const;
 
+    /// compare_numeric - Compare two strings, treating sequences of digits as
+    /// numbers.
+    int compare_numeric(StringRef RHS) const;
+
     /// \brief Determine the edit distance between this string and another 
     /// string.
     ///
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index 97e9df4..b519a3e 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -99,11 +99,12 @@ namespace llvm {
       /// A pointer to a StringRef instance.
       StringRefKind,
 
-      /// A pointer to an unsigned int value, to render as an unsigned decimal
-      /// integer.
+      /// An unsigned int value reinterpreted as a pointer, to render as an 
+      /// unsigned decimal integer.
       DecUIKind,
 
-      /// A pointer to an int value, to render as a signed decimal integer.
+      /// An int value reinterpreted as a pointer, to render as a signed
+      /// decimal integer.
       DecIKind,
 
       /// A pointer to an unsigned long value, to render as an unsigned decimal
@@ -259,13 +260,13 @@ namespace llvm {
     }
 
     /// Construct a twine to print \arg Val as an unsigned decimal integer.
-    explicit Twine(const unsigned int &Val) 
-      : LHS(&Val), LHSKind(DecUIKind), RHSKind(EmptyKind) {
+    explicit Twine(unsigned Val) 
+      : LHS((void*)(intptr_t)Val), LHSKind(DecUIKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as a signed decimal integer.
-    explicit Twine(const int &Val) 
-      : LHS(&Val), LHSKind(DecIKind), RHSKind(EmptyKind) {
+    explicit Twine(int Val) 
+      : LHS((void*)(intptr_t)Val), LHSKind(DecIKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as an unsigned decimal integer.
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index da25f95..f008003 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -49,6 +49,56 @@ class ilist_node : private ilist_half_node<NodeTy> {
   void setNext(NodeTy *N) { Next = N; }
 protected:
   ilist_node() : Next(0) {}
+
+public:
+  /// @name Adjacent Node Accessors
+  /// @{
+
+  /// \brief Get the previous node, or 0 for the list head.
+  NodeTy *getPrevNode() {
+    NodeTy *Prev = this->getPrev();
+
+    // Check for sentinel.
+    if (!Prev->getNext())
+      return 0;
+
+    return Prev;
+  }
+
+  /// \brief Get the previous node, or 0 for the list head.
+  const NodeTy *getPrevNode() const {
+    const NodeTy *Prev = this->getPrev();
+
+    // Check for sentinel.
+    if (!Prev->getNext())
+      return 0;
+
+    return Prev;
+  }
+
+  /// \brief Get the next node, or 0 for the list tail.
+  NodeTy *getNextNode() {
+    NodeTy *Next = getNext();
+
+    // Check for sentinel.
+    if (!Next->getNext())
+      return 0;
+
+    return Next;
+  }
+
+  /// \brief Get the next node, or 0 for the list tail.
+  const NodeTy *getNextNode() const {
+    const NodeTy *Next = getNext();
+
+    // Check for sentinel.
+    if (!Next->getNext())
+      return 0;
+
+    return Next;
+  }
+
+  /// @}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 9b1379d..473b127 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -34,13 +34,14 @@ namespace llvm {
   class Instruction;
   class MDNode;
   class LLVMContext;
+  class raw_ostream;
 
   /// DIDescriptor - A thin wraper around MDNode to access encoded debug info.
   /// This should not be stored in a container, because underly MDNode may
   /// change in certain situations.
   class DIDescriptor {
   protected:
-    MDNode *DbgNode;
+    const MDNode *DbgNode;
 
     StringRef getStringField(unsigned Elt) const;
     unsigned getUnsignedField(unsigned Elt) const {
@@ -51,18 +52,19 @@ namespace llvm {
 
     template <typename DescTy>
     DescTy getFieldAs(unsigned Elt) const {
-      return DescTy(getDescriptorField(Elt).getNode());
+      return DescTy(getDescriptorField(Elt));
     }
 
     GlobalVariable *getGlobalVariableField(unsigned Elt) const;
 
   public:
     explicit DIDescriptor() : DbgNode(0) {}
-    explicit DIDescriptor(MDNode *N) : DbgNode(N) {}
+    explicit DIDescriptor(const MDNode *N) : DbgNode(N) {}
 
     bool Verify() const { return DbgNode != 0; }
 
-    MDNode *getNode() const { return DbgNode; }
+    operator MDNode *() const { return const_cast<MDNode*>(DbgNode); }
+    MDNode *operator ->() const { return const_cast<MDNode*>(DbgNode); }
 
     unsigned getVersion() const {
       return getUnsignedField(0) & LLVMDebugVersionMask;
@@ -72,10 +74,10 @@ namespace llvm {
       return getUnsignedField(0) & ~LLVMDebugVersionMask;
     }
 
-    /// ValidDebugInfo - Return true if N represents valid debug info value.
-    static bool ValidDebugInfo(MDNode *N, unsigned OptLevel);
+    /// print - print descriptor.
+    void print(raw_ostream &OS) const;
 
-    /// dump - print descriptor.
+    /// dump - print descriptor to dbgs() with a newline.
     void dump() const;
 
     bool isDerivedType() const;
@@ -98,7 +100,7 @@ namespace llvm {
   /// DISubrange - This is used to represent ranges, for array bounds.
   class DISubrange : public DIDescriptor {
   public:
-    explicit DISubrange(MDNode *N = 0) : DIDescriptor(N) {}
+    explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {}
 
     int64_t getLo() const { return (int64_t)getUInt64Field(1); }
     int64_t getHi() const { return (int64_t)getUInt64Field(2); }
@@ -107,7 +109,7 @@ namespace llvm {
   /// DIArray - This descriptor holds an array of descriptors.
   class DIArray : public DIDescriptor {
   public:
-    explicit DIArray(MDNode *N = 0)
+    explicit DIArray(const MDNode *N = 0)
       : DIDescriptor(N) {}
 
     unsigned getNumElements() const;
@@ -119,7 +121,7 @@ namespace llvm {
   /// DIScope - A base class for various scopes.
   class DIScope : public DIDescriptor {
   public:
-    explicit DIScope(MDNode *N = 0) : DIDescriptor (N) {}
+    explicit DIScope(const MDNode *N = 0) : DIDescriptor (N) {}
     virtual ~DIScope() {}
 
     StringRef getFilename() const;
@@ -129,7 +131,7 @@ namespace llvm {
   /// DICompileUnit - A wrapper for a compile unit.
   class DICompileUnit : public DIScope {
   public:
-    explicit DICompileUnit(MDNode *N = 0) : DIScope(N) {}
+    explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {}
 
     unsigned getLanguage() const     { return getUnsignedField(2); }
     StringRef getFilename() const  { return getStringField(3);   }
@@ -153,14 +155,17 @@ namespace llvm {
     /// Verify - Verify that a compile unit is well formed.
     bool Verify() const;
 
-    /// dump - print compile unit.
+    /// print - print compile unit.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print compile unit to dbgs() with a newline.
     void dump() const;
   };
 
   /// DIFile - This is a wrapper for a file.
   class DIFile : public DIScope {
   public:
-    explicit DIFile(MDNode *N = 0) : DIScope(N) {
+    explicit DIFile(const MDNode *N = 0) : DIScope(N) {
       if (DbgNode && !isFile())
         DbgNode = 0;
     }
@@ -174,7 +179,7 @@ namespace llvm {
   /// type/precision or a file/line pair for location info.
   class DIEnumerator : public DIDescriptor {
   public:
-    explicit DIEnumerator(MDNode *N = 0) : DIDescriptor(N) {}
+    explicit DIEnumerator(const MDNode *N = 0) : DIDescriptor(N) {}
 
     StringRef getName() const        { return getStringField(1); }
     uint64_t getEnumValue() const      { return getUInt64Field(2); }
@@ -199,14 +204,14 @@ namespace llvm {
   protected:
     // This ctor is used when the Tag has already been validated by a derived
     // ctor.
-    DIType(MDNode *N, bool, bool) : DIScope(N) {}
+    DIType(const MDNode *N, bool, bool) : DIScope(N) {}
 
   public:
 
     /// Verify - Verify that a type descriptor is well formed.
     bool Verify() const;
   public:
-    explicit DIType(MDNode *N);
+    explicit DIType(const MDNode *N);
     explicit DIType() {}
     virtual ~DIType() {}
 
@@ -253,18 +258,25 @@ namespace llvm {
     }
     StringRef getFilename() const    { return getCompileUnit().getFilename();}
     StringRef getDirectory() const   { return getCompileUnit().getDirectory();}
-    /// dump - print type.
+
+    /// print - print type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print type to dbgs() with a newline.
     void dump() const;
   };
 
   /// DIBasicType - A basic type, like 'int' or 'float'.
   class DIBasicType : public DIType {
   public:
-    explicit DIBasicType(MDNode *N = 0) : DIType(N) {}
+    explicit DIBasicType(const MDNode *N = 0) : DIType(N) {}
 
     unsigned getEncoding() const { return getUnsignedField(9); }
 
-    /// dump - print basic type.
+    /// print - print basic type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print basic type to dbgs() with a newline.
     void dump() const;
   };
 
@@ -272,10 +284,10 @@ namespace llvm {
   /// a typedef, a pointer or reference, etc.
   class DIDerivedType : public DIType {
   protected:
-    explicit DIDerivedType(MDNode *N, bool, bool)
+    explicit DIDerivedType(const MDNode *N, bool, bool)
       : DIType(N, true, true) {}
   public:
-    explicit DIDerivedType(MDNode *N = 0)
+    explicit DIDerivedType(const MDNode *N = 0)
       : DIType(N, true, true) {}
 
     DIType getTypeDerivedFrom() const { return getFieldAs<DIType>(9); }
@@ -283,7 +295,11 @@ namespace llvm {
     /// getOriginalTypeSize - If this type is derived from a base type then
     /// return base type size.
     uint64_t getOriginalTypeSize() const;
-    /// dump - print derived type.
+
+    /// print - print derived type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print derived type to dbgs() with a newline.
     void dump() const;
 
     /// replaceAllUsesWith - Replace all uses of debug info referenced by
@@ -297,7 +313,7 @@ namespace llvm {
   /// FIXME: Why is this a DIDerivedType??
   class DICompositeType : public DIDerivedType {
   public:
-    explicit DICompositeType(MDNode *N = 0)
+    explicit DICompositeType(const MDNode *N = 0)
       : DIDerivedType(N, true, true) {
       if (N && !isCompositeType())
         DbgNode = 0;
@@ -312,46 +328,17 @@ namespace llvm {
     /// Verify - Verify that a composite type descriptor is well formed.
     bool Verify() const;
 
-    /// dump - print composite type.
-    void dump() const;
-  };
-
-  /// DIGlobal - This is a common class for global variables and subprograms.
-  class DIGlobal : public DIDescriptor {
-  protected:
-    explicit DIGlobal(MDNode *N) : DIDescriptor(N) {}
-
-  public:
-    virtual ~DIGlobal() {}
-
-    DIScope getContext() const          { return getFieldAs<DIScope>(2); }
-    StringRef getName() const         { return getStringField(3); }
-    StringRef getDisplayName() const  { return getStringField(4); }
-    StringRef getLinkageName() const  { return getStringField(5); }
-    DICompileUnit getCompileUnit() const{ 
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getFieldAs<DICompileUnit>(6);
-
-      DIFile F = getFieldAs<DIFile>(6); 
-      return F.getCompileUnit();
-    }
-
-    unsigned getLineNumber() const      { return getUnsignedField(7); }
-    DIType getType() const              { return getFieldAs<DIType>(8); }
+    /// print - print composite type.
+    void print(raw_ostream &OS) const;
 
-    /// isLocalToUnit - Return true if this subprogram is local to the current
-    /// compile unit, like 'static' in C.
-    unsigned isLocalToUnit() const      { return getUnsignedField(9); }
-    unsigned isDefinition() const       { return getUnsignedField(10); }
-
-    /// dump - print global.
+    /// dump - print composite type to dbgs() with a newline.
     void dump() const;
   };
 
   /// DISubprogram - This is a wrapper for a subprogram (e.g. a function).
   class DISubprogram : public DIScope {
   public:
-    explicit DISubprogram(MDNode *N = 0) : DIScope(N) {}
+    explicit DISubprogram(const MDNode *N = 0) : DIScope(N) {}
 
     DIScope getContext() const          { return getFieldAs<DIScope>(2); }
     StringRef getName() const         { return getStringField(3); }
@@ -373,7 +360,7 @@ namespace llvm {
       DICompositeType DCT(getFieldAs<DICompositeType>(8));
       if (DCT.Verify()) {
         DIArray A = DCT.getTypeArray();
-        DIType T(A.getElement(0).getNode());
+        DIType T(A.getElement(0));
         return T.getName();
       }
       DIType T(getFieldAs<DIType>(8));
@@ -413,7 +400,10 @@ namespace llvm {
     /// Verify - Verify that a subprogram descriptor is well formed.
     bool Verify() const;
 
-    /// dump - print subprogram.
+    /// print - print subprogram.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print subprogram to dbgs() with a newline.
     void dump() const;
 
     /// describes - Return true if this subprogram provides debugging
@@ -422,16 +412,36 @@ namespace llvm {
   };
 
   /// DIGlobalVariable - This is a wrapper for a global variable.
-  class DIGlobalVariable : public DIGlobal {
+  class DIGlobalVariable : public DIDescriptor {
   public:
-    explicit DIGlobalVariable(MDNode *N = 0) : DIGlobal(N) {}
+    explicit DIGlobalVariable(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    DIScope getContext() const          { return getFieldAs<DIScope>(2); }
+    StringRef getName() const         { return getStringField(3); }
+    StringRef getDisplayName() const  { return getStringField(4); }
+    StringRef getLinkageName() const  { return getStringField(5); }
+    DICompileUnit getCompileUnit() const{ 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getFieldAs<DICompileUnit>(6);
+
+      DIFile F = getFieldAs<DIFile>(6); 
+      return F.getCompileUnit();
+    }
+
+    unsigned getLineNumber() const      { return getUnsignedField(7); }
+    DIType getType() const              { return getFieldAs<DIType>(8); }
+    unsigned isLocalToUnit() const      { return getUnsignedField(9); }
+    unsigned isDefinition() const       { return getUnsignedField(10); }
 
     GlobalVariable *getGlobal() const { return getGlobalVariableField(11); }
 
     /// Verify - Verify that a global variable descriptor is well formed.
     bool Verify() const;
 
-    /// dump - print global variable.
+    /// print - print global variable.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print global variable to dbgs() with a newline.
     void dump() const;
   };
 
@@ -439,7 +449,7 @@ namespace llvm {
   /// global etc).
   class DIVariable : public DIDescriptor {
   public:
-    explicit DIVariable(MDNode *N = 0)
+    explicit DIVariable(const MDNode *N = 0)
       : DIDescriptor(N) {}
 
     DIScope getContext() const          { return getFieldAs<DIScope>(1); }
@@ -479,14 +489,17 @@ namespace llvm {
     /// information for an inlined function arguments.
     bool isInlinedFnArgument(const Function *CurFn);
 
-    /// dump - print variable.
+    /// print - print variable.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print variable to dbgs() with a newline.
     void dump() const;
   };
 
   /// DILexicalBlock - This is a wrapper for a lexical block.
   class DILexicalBlock : public DIScope {
   public:
-    explicit DILexicalBlock(MDNode *N = 0) : DIScope(N) {}
+    explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {}
     DIScope getContext() const       { return getFieldAs<DIScope>(1);      }
     StringRef getDirectory() const   { return getContext().getDirectory(); }
     StringRef getFilename() const    { return getContext().getFilename();  }
@@ -497,7 +510,7 @@ namespace llvm {
   /// DINameSpace - A wrapper for a C++ style name space.
   class DINameSpace : public DIScope { 
   public:
-    explicit DINameSpace(MDNode *N = 0) : DIScope(N) {}
+    explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
     DIScope getContext() const     { return getFieldAs<DIScope>(1);      }
     StringRef getName() const      { return getStringField(2);           }
     StringRef getDirectory() const { return getContext().getDirectory(); }
@@ -510,13 +523,14 @@ namespace llvm {
       return F.getCompileUnit();
     }
     unsigned getLineNumber() const { return getUnsignedField(4);         }
+    bool Verify() const;
   };
 
   /// DILocation - This object holds location information. This object
   /// is not associated with any DWARF tag.
   class DILocation : public DIDescriptor {
   public:
-    explicit DILocation(MDNode *N) : DIDescriptor(N) { }
+    explicit DILocation(const MDNode *N) : DIDescriptor(N) { }
 
     unsigned getLineNumber() const     { return getUnsignedField(0); }
     unsigned getColumnNumber() const   { return getUnsignedField(1); }
@@ -663,7 +677,7 @@ namespace llvm {
     DIVariable CreateVariable(unsigned Tag, DIDescriptor Context,
                               StringRef Name,
                               DIFile F, unsigned LineNo,
-                              DIType Ty);
+                              DIType Ty, bool AlwaysPreserve = false);
 
     /// CreateComplexVariable - Create a new descriptor for the specified
     /// variable which has a complex address expression for its address.
@@ -715,7 +729,7 @@ namespace llvm {
                        std::string &Dir);
 
   /// getDISubprogram - Find subprogram that is enclosing this scope.
-  DISubprogram getDISubprogram(MDNode *Scope);
+  DISubprogram getDISubprogram(const MDNode *Scope);
 
   /// getDICompositeType - Find underlying composite type.
   DICompositeType getDICompositeType(DIType T);
@@ -755,20 +769,20 @@ namespace llvm {
     bool addType(DIType DT);
 
   public:
-    typedef SmallVector<MDNode *, 8>::iterator iterator;
-    iterator compile_unit_begin()    { return CUs.begin(); }
-    iterator compile_unit_end()      { return CUs.end(); }
-    iterator subprogram_begin()      { return SPs.begin(); }
-    iterator subprogram_end()        { return SPs.end(); }
-    iterator global_variable_begin() { return GVs.begin(); }
-    iterator global_variable_end()   { return GVs.end(); }
-    iterator type_begin()            { return TYs.begin(); }
-    iterator type_end()              { return TYs.end(); }
-
-    unsigned compile_unit_count()    { return CUs.size(); }
-    unsigned global_variable_count() { return GVs.size(); }
-    unsigned subprogram_count()      { return SPs.size(); }
-    unsigned type_count()            { return TYs.size(); }
+    typedef SmallVector<MDNode *, 8>::const_iterator iterator;
+    iterator compile_unit_begin()    const { return CUs.begin(); }
+    iterator compile_unit_end()      const { return CUs.end(); }
+    iterator subprogram_begin()      const { return SPs.begin(); }
+    iterator subprogram_end()        const { return SPs.end(); }
+    iterator global_variable_begin() const { return GVs.begin(); }
+    iterator global_variable_end()   const { return GVs.end(); }
+    iterator type_begin()            const { return TYs.begin(); }
+    iterator type_end()              const { return TYs.end(); }
+
+    unsigned compile_unit_count()    const { return CUs.size(); }
+    unsigned global_variable_count() const { return GVs.size(); }
+    unsigned subprogram_count()      const { return SPs.size(); }
+    unsigned type_count()            const { return TYs.size(); }
 
   private:
     SmallVector<MDNode *, 8> CUs;  // Compile Units
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index d51dd6c..cac7cfe 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -198,6 +198,9 @@ namespace llvm {
     /// has been inlined. If Callee is NULL it means a dead call has been
     /// eliminated.
     void growCachedCostInfo(Function* Caller, Function* Callee);
+
+    /// clear - empty the cache of inline costs
+    void clear();
   };
 
   /// callIsSmall - If a call is likely to lower to a single target instruction,
diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h
index 2f01366..eb65d22 100644
--- a/include/llvm/Analysis/Lint.h
+++ b/include/llvm/Analysis/Lint.h
@@ -38,8 +38,7 @@ FunctionPass *createLintPass();
 /// This should only be used for debugging, because it plays games with
 /// PassManagers and stuff.
 void lintModule(
-  const Module &M,  ///< The module to be checked
-  std::string *ErrorInfo = 0      ///< Information about failures.
+  const Module &M    ///< The module to be checked
 );
 
 // lintFunction - Check a function.
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 1a5cbb2..ce3f7a6 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -153,6 +153,9 @@ namespace llvm {
 
   // print debug info intrinsics in human readable form
   FunctionPass *createDbgInfoPrinterPass();
+
+  // Print module-level debug info metadata in human-readable form.
+  ModulePass *createModuleDebugInfoPrinterPass();
 }
 
 #endif
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index 624390d..b0481b9 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -74,7 +74,12 @@ namespace CallingConv {
     ARM_AAPCS_VFP = 68,
 
     /// MSP430_INTR - Calling convention used for MSP430 interrupt routines.
-    MSP430_INTR = 69
+    MSP430_INTR = 69,
+
+    /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX,
+    /// others via stack. Callee is responsible for stack cleaning. MSVC uses
+    /// this by default for methods in its ABI.
+    X86_ThisCall = 70
   };
 } // End CallingConv namespace
 
diff --git a/include/llvm/CodeGen/ELFRelocation.h b/include/llvm/CodeGen/ELFRelocation.h
deleted file mode 100644
index e58b8df..0000000
--- a/include/llvm/CodeGen/ELFRelocation.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//=== ELFRelocation.h - ELF Relocation Info ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ELFRelocation class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_ELF_RELOCATION_H
-#define LLVM_CODEGEN_ELF_RELOCATION_H
-
-#include "llvm/System/DataTypes.h"
-
-namespace llvm {
-
-  /// ELFRelocation - This class contains all the information necessary to
-  /// to generate any 32-bit or 64-bit ELF relocation entry.
-  class ELFRelocation {
-    uint64_t r_offset;    // offset in the section of the object this applies to
-    uint32_t r_symidx;    // symbol table index of the symbol to use
-    uint32_t r_type;      // machine specific relocation type
-    int64_t  r_add;       // explicit relocation addend
-    bool     r_rela;      // if true then the addend is part of the entry
-                          // otherwise the addend is at the location specified
-                          // by r_offset
-  public:      
-  
-    uint64_t getInfo(bool is64Bit = false) const {
-      if (is64Bit)
-        return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
-      else
-        return (r_symidx << 8)  + (r_type & 0xFFL);
-    }
-  
-    uint64_t getOffset() const { return r_offset; }
-    uint64_t getAddress() const { return r_add; }
-  
-    ELFRelocation(uint64_t off, uint32_t sym, uint32_t type, 
-                  bool rela = true, int64_t addend = 0) : 
-      r_offset(off), r_symidx(sym), r_type(type),
-      r_add(addend), r_rela(rela) {}
-  };
-
-} // end llvm namespace
-
-#endif // LLVM_CODEGEN_ELF_RELOCATION_H
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 5a2b0e7..005c7bc 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -60,6 +60,7 @@ protected:
   const TargetData &TD;
   const TargetInstrInfo &TII;
   const TargetLowering &TLI;
+  bool IsBottomUp;
 
 public:
   /// startNewBlock - Set the current block to which generated machine
@@ -105,7 +106,7 @@ public:
   /// getRegForGEPIndex - This is a wrapper around getRegForValue that also
   /// takes care of truncating or sign-extending the given getelementptr
   /// index value.
-  unsigned getRegForGEPIndex(const Value *V);
+  std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
 
   virtual ~FastISel();
 
@@ -141,7 +142,8 @@ protected:
   ///
   virtual unsigned FastEmit_r(MVT VT,
                               MVT RetVT,
-                              unsigned Opcode, unsigned Op0);
+                              unsigned Opcode,
+                              unsigned Op0, bool Op0IsKill);
 
   /// FastEmit_rr - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -150,7 +152,8 @@ protected:
   virtual unsigned FastEmit_rr(MVT VT,
                                MVT RetVT,
                                unsigned Opcode,
-                               unsigned Op0, unsigned Op1);
+                               unsigned Op0, bool Op0IsKill,
+                               unsigned Op1, bool Op1IsKill);
 
   /// FastEmit_ri - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -159,7 +162,8 @@ protected:
   virtual unsigned FastEmit_ri(MVT VT,
                                MVT RetVT,
                                unsigned Opcode,
-                               unsigned Op0, uint64_t Imm);
+                               unsigned Op0, bool Op0IsKill,
+                               uint64_t Imm);
 
   /// FastEmit_rf - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -168,7 +172,8 @@ protected:
   virtual unsigned FastEmit_rf(MVT VT,
                                MVT RetVT,
                                unsigned Opcode,
-                               unsigned Op0, const ConstantFP *FPImm);
+                               unsigned Op0, bool Op0IsKill,
+                               const ConstantFP *FPImm);
 
   /// FastEmit_rri - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -177,7 +182,9 @@ protected:
   virtual unsigned FastEmit_rri(MVT VT,
                                 MVT RetVT,
                                 unsigned Opcode,
-                                unsigned Op0, unsigned Op1, uint64_t Imm);
+                                unsigned Op0, bool Op0IsKill,
+                                unsigned Op1, bool Op1IsKill,
+                                uint64_t Imm);
 
   /// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
   /// to emit an instruction with an immediate operand using FastEmit_ri.
@@ -185,8 +192,8 @@ protected:
   /// FastEmit_rr instead.
   unsigned FastEmit_ri_(MVT VT,
                         unsigned Opcode,
-                        unsigned Op0, uint64_t Imm,
-                        MVT ImmType);
+                        unsigned Op0, bool Op0IsKill,
+                        uint64_t Imm, MVT ImmType);
   
   /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries
   /// to emit an instruction with an immediate operand using FastEmit_rf.
@@ -194,8 +201,8 @@ protected:
   /// FastEmit_rr instead.
   unsigned FastEmit_rf_(MVT VT,
                         unsigned Opcode,
-                        unsigned Op0, const ConstantFP *FPImm,
-                        MVT ImmType);
+                        unsigned Op0, bool Op0IsKill,
+                        const ConstantFP *FPImm, MVT ImmType);
   
   /// FastEmit_i - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -224,35 +231,40 @@ protected:
   ///
   unsigned FastEmitInst_r(unsigned MachineInstOpcode,
                           const TargetRegisterClass *RC,
-                          unsigned Op0);
+                          unsigned Op0, bool Op0IsKill);
 
   /// FastEmitInst_rr - Emit a MachineInstr with two register operands
   /// and a result register in the given register class.
   ///
   unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC,
-                           unsigned Op0, unsigned Op1);
+                           unsigned Op0, bool Op0IsKill,
+                           unsigned Op1, bool Op1IsKill);
 
   /// FastEmitInst_ri - Emit a MachineInstr with two register operands
   /// and a result register in the given register class.
   ///
   unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC,
-                           unsigned Op0, uint64_t Imm);
+                           unsigned Op0, bool Op0IsKill,
+                           uint64_t Imm);
 
   /// FastEmitInst_rf - Emit a MachineInstr with two register operands
   /// and a result register in the given register class.
   ///
   unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC,
-                           unsigned Op0, const ConstantFP *FPImm);
+                           unsigned Op0, bool Op0IsKill,
+                           const ConstantFP *FPImm);
 
   /// FastEmitInst_rri - Emit a MachineInstr with two register operands,
   /// an immediate, and a result register in the given register class.
   ///
   unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC,
-                            unsigned Op0, unsigned Op1, uint64_t Imm);
+                            unsigned Op0, bool Op0IsKill,
+                            unsigned Op1, bool Op1IsKill,
+                            uint64_t Imm);
   
   /// FastEmitInst_i - Emit a MachineInstr with a single immediate
   /// operand, and a result register in the given register class.
@@ -263,12 +275,13 @@ protected:
   /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg
   /// from a specified index of a superregister to a specified type.
   unsigned FastEmitInst_extractsubreg(MVT RetVT,
-                                      unsigned Op0, uint32_t Idx);
+                                      unsigned Op0, bool Op0IsKill,
+                                      uint32_t Idx);
 
   /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
   /// with all but the least significant bit set to zero.
   unsigned FastEmitZExtFromI1(MVT VT,
-                              unsigned Op);
+                              unsigned Op0, bool Op0IsKill);
 
   /// FastEmitBranch - Emit an unconditional branch to the given block,
   /// unless it is the immediate (fall-through) successor, and update
@@ -316,6 +329,9 @@ private:
   /// called when the value isn't already available in a register and must
   /// be materialized with new instructions.
   unsigned materializeRegForValue(const Value *V, MVT VT);
+
+  /// hasTrivialKill - Test whether the given value has exactly one use.
+  bool hasTrivialKill(const Value *V) const;
 };
 
 }
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index ba554d3..a5e9dd5 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -95,6 +95,18 @@ namespace ISD {
     // execution to HANDLER. Many platform-related details also :)
     EH_RETURN,
 
+    // OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer)
+    // This corresponds to the eh.sjlj.setjmp intrinsic.
+    // It takes an input chain and a pointer to the jump buffer as inputs
+    // and returns an outchain.
+    EH_SJLJ_SETJMP,
+
+    // OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer)
+    // This corresponds to the eh.sjlj.longjmp intrinsic.
+    // It takes an input chain and a pointer to the jump buffer as inputs
+    // and returns an outchain.
+    EH_SJLJ_LONGJMP,
+
     // TargetConstant* - Like Constant*, but the DAG does not do any folding,
     // simplification, or lowering of the constant. They are used for constants
     // which are known to fit in the immediate fields of their users, or for
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 7ac0418..13cebea 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -17,7 +17,6 @@
 #define LATENCY_PRIORITY_QUEUE_H
 
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/ADT/PriorityQueue.h"
 
 namespace llvm {
   class LatencyPriorityQueue;
@@ -41,10 +40,11 @@ namespace llvm {
     std::vector<unsigned> NumNodesSolelyBlocking;
     
     /// Queue - The queue.
-    PriorityQueue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
+    std::vector<SUnit*> Queue;
+    latency_sort Picker;
 
-public:
-  LatencyPriorityQueue() : Queue(latency_sort(this)) {
+  public:
+    LatencyPriorityQueue() : Picker(this) {
     }
 
     void initNodes(std::vector<SUnit> &sunits) {
@@ -73,31 +73,13 @@ public:
       return NumNodesSolelyBlocking[NodeNum];
     }
     
-    unsigned size() const { return Queue.size(); }
-
     bool empty() const { return Queue.empty(); }
     
-    virtual void push(SUnit *U) {
-      push_impl(U);
-    }
-    void push_impl(SUnit *U);
-    
-    void push_all(const std::vector<SUnit *> &Nodes) {
-      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
-        push_impl(Nodes[i]);
-    }
+    virtual void push(SUnit *U);
     
-    SUnit *pop() {
-      if (empty()) return NULL;
-      SUnit *V = Queue.top();
-      Queue.pop();
-      return V;
-    }
+    virtual SUnit *pop();
 
-    void remove(SUnit *SU) {
-      assert(!Queue.empty() && "Not in queue!");
-      Queue.erase_one(SU);
-    }
+    virtual void remove(SUnit *SU);
 
     // ScheduledNode - As nodes are scheduled, we look to see if there are any
     // successor nodes that have a single unscheduled predecessor.  If so, that
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 0064776..b4c2f2f 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -46,6 +46,7 @@ namespace {
       (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createTDRRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createSourceListDAGScheduler(NULL,llvm::CodeGenOpt::Default);
+      (void) llvm::createHybridListDAGScheduler(NULL,llvm::CodeGenOpt::Default);
       (void) llvm::createTDListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createFastDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createDefaultScheduler(NULL, llvm::CodeGenOpt::Default);
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 70e79ce..637f52b 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -551,10 +551,6 @@ namespace llvm {
     /// Also remove the value# from value# list.
     void removeValNo(VNInfo *ValNo);
 
-    /// scaleNumbering - Renumber VNI and ranges to provide gaps for new
-    /// instructions.
-    void scaleNumbering(unsigned factor);
-
     /// getSize - Returns the sum of sizes of all the LiveRange's.
     ///
     unsigned getSize() const;
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 351217c..32fa709 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -303,6 +303,12 @@ namespace llvm {
                            SlotIndex MIIdx,
                            MachineOperand& MO, unsigned MOIdx);
 
+    /// isPartialRedef - Return true if the specified def at the specific index
+    /// is partially re-defining the specified live interval. A common case of
+    /// this is a definition of the sub-register. 
+    bool isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+                        LiveInterval &interval);
+
     /// handleVirtualRegisterDef - update intervals for a virtual
     /// register def
     void handleVirtualRegisterDef(MachineBasicBlock *MBB,
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index c7c5300..fe2c298 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -125,6 +125,10 @@ class MachineFrameInfo {
   /// to builtin \@llvm.frameaddress.
   bool FrameAddressTaken;
 
+  /// ReturnAddressTaken - This boolean keeps track of whether there is a call
+  /// to builtin \@llvm.returnaddress.
+  bool ReturnAddressTaken;
+
   /// StackSize - The prolog/epilog code inserter calculates the final stack
   /// offsets for all of the fixed size objects, updating the Objects list
   /// above.  It then updates StackSize to contain the number of bytes that need
@@ -152,8 +156,12 @@ class MachineFrameInfo {
   ///
   unsigned MaxAlignment;
 
-  /// HasCalls - Set to true if this function has any function calls.  This is
-  /// only valid during and after prolog/epilog code insertion.
+  /// AdjustsStack - Set to true if this function adjusts the stack -- e.g.,
+  /// when calling another function. This is only valid during and after
+  /// prolog/epilog code insertion.
+  bool AdjustsStack;
+
+  /// HasCalls - Set to true if this function has any function calls.
   bool HasCalls;
 
   /// StackProtectorIdx - The frame index for the stack protector.
@@ -189,6 +197,8 @@ public:
     StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
     HasVarSizedObjects = false;
     FrameAddressTaken = false;
+    ReturnAddressTaken = false;
+    AdjustsStack = false;
     HasCalls = false;
     StackProtectorIdx = -1;
     MaxCallFrameSize = 0;
@@ -218,6 +228,12 @@ public:
   bool isFrameAddressTaken() const { return FrameAddressTaken; }
   void setFrameAddressIsTaken(bool T) { FrameAddressTaken = T; }
 
+  /// isReturnAddressTaken - This method may be called any time after instruction
+  /// selection is complete to determine if there is a call to
+  /// \@llvm.returnaddress in this function.
+  bool isReturnAddressTaken() const { return ReturnAddressTaken; }
+  void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; }
+
   /// getObjectIndexBegin - Return the minimum frame object index.
   ///
   int getObjectIndexBegin() const { return -NumFixedObjects; }
@@ -313,9 +329,13 @@ public:
   ///
   void setMaxAlignment(unsigned Align) { MaxAlignment = Align; }
 
-  /// hasCalls - Return true if the current function has no function calls.
-  /// This is only valid during or after prolog/epilog code emission.
-  ///
+  /// AdjustsStack - Return true if this function adjusts the stack -- e.g.,
+  /// when calling another function. This is only valid during and after
+  /// prolog/epilog code insertion.
+  bool adjustsStack() const { return AdjustsStack; }
+  void setAdjustsStack(bool V) { AdjustsStack = V; }
+
+  /// hasCalls - Return true if the current function has any function calls.
   bool hasCalls() const { return HasCalls; }
   void setHasCalls(bool V) { HasCalls = V; }
 
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 595872a..409d13ee 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -114,9 +114,14 @@ class MachineFunction {
   ///
   unsigned FunctionNumber;
   
-  /// The alignment of the function.
+  /// Alignment - The alignment of the function.
   unsigned Alignment;
 
+  /// CallsSetJmp - True if the function calls setjmp or sigsetjmp. This is used
+  /// to limit optimizations which cannot reason about the control flow of
+  /// setjmp.
+  bool CallsSetJmp;
+
   MachineFunction(const MachineFunction &); // DO NOT IMPLEMENT
   void operator=(const MachineFunction&);   // DO NOT IMPLEMENT
 public:
@@ -181,6 +186,17 @@ public:
   void EnsureAlignment(unsigned A) {
     if (Alignment < A) Alignment = A;
   }
+
+  /// callsSetJmp - Returns true if the function calls setjmp or sigsetjmp.
+  bool callsSetJmp() const {
+    return CallsSetJmp;
+  }
+
+  /// setCallsSetJmp - Set a flag that indicates if there's a call to setjmp or
+  /// sigsetjmp.
+  void setCallsSetJmp(bool B) {
+    CallsSetJmp = B;
+  }
   
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index c4adca1..cf691bb 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -28,6 +28,7 @@
 
 namespace llvm {
 
+template <typename T> class SmallVectorImpl;
 class AliasAnalysis;
 class TargetInstrDesc;
 class TargetInstrInfo;
@@ -223,14 +224,33 @@ public:
   bool isSubregToReg() const {
     return getOpcode() == TargetOpcode::SUBREG_TO_REG;
   }
+  bool isRegSequence() const {
+    return getOpcode() == TargetOpcode::REG_SEQUENCE;
+  }
   
   /// readsRegister - Return true if the MachineInstr reads the specified
   /// register. If TargetRegisterInfo is passed, then it also checks if there
   /// is a read of a super-register.
+  /// This does not count partial redefines of virtual registers as reads:
+  ///   %reg1024:6 = OP.
   bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI = NULL) const {
     return findRegisterUseOperandIdx(Reg, false, TRI) != -1;
   }
 
+  /// readsVirtualRegister - Return true if the MachineInstr reads the specified
+  /// virtual register. Take into account that a partial define is a
+  /// read-modify-write operation.
+  bool readsVirtualRegister(unsigned Reg) const {
+    return readsWritesVirtualRegister(Reg).first;
+  }
+
+  /// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+  /// indicating if this instruction reads or writes Reg. This also considers
+  /// partial defines.
+  /// If Ops is not null, all operand indices for Reg are added.
+  std::pair<bool,bool> readsWritesVirtualRegister(unsigned Reg,
+                                      SmallVectorImpl<unsigned> *Ops = 0) const;
+
   /// killsRegister - Return true if the MachineInstr kills the specified
   /// register. If TargetRegisterInfo is passed, then it also checks if there is
   /// a kill of a super-register.
@@ -238,12 +258,19 @@ public:
     return findRegisterUseOperandIdx(Reg, true, TRI) != -1;
   }
 
-  /// modifiesRegister - Return true if the MachineInstr modifies the
+  /// definesRegister - Return true if the MachineInstr fully defines the
   /// specified register. If TargetRegisterInfo is passed, then it also checks
   /// if there is a def of a super-register.
-  bool modifiesRegister(unsigned Reg,
-                        const TargetRegisterInfo *TRI = NULL) const {
-    return findRegisterDefOperandIdx(Reg, false, TRI) != -1;
+  /// NOTE: It's ignoring subreg indices on virtual registers.
+  bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=NULL) const {
+    return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1;
+  }
+
+  /// modifiesRegister - Return true if the MachineInstr modifies (fully define
+  /// or partially define) the specified register.
+  /// NOTE: It's ignoring subreg indices on virtual registers.
+  bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const {
+    return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1;
   }
 
   /// registerDefIsDead - Returns true if the register is dead in this machine
@@ -251,7 +278,7 @@ public:
   /// if there is a dead def of a super-register.
   bool registerDefIsDead(unsigned Reg,
                          const TargetRegisterInfo *TRI = NULL) const {
-    return findRegisterDefOperandIdx(Reg, true, TRI) != -1;
+    return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1;
   }
 
   /// findRegisterUseOperandIdx() - Returns the operand index that is a use of
@@ -270,16 +297,18 @@ public:
   
   /// findRegisterDefOperandIdx() - Returns the operand index that is a def of
   /// the specified register or -1 if it is not found. If isDead is true, defs
-  /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
-  /// also checks if there is a def of a super-register.
-  int findRegisterDefOperandIdx(unsigned Reg, bool isDead = false,
+  /// that are not dead are skipped. If Overlap is true, then it also looks for
+  /// defs that merely overlap the specified register. If TargetRegisterInfo is
+  /// non-null, then it also checks if there is a def of a super-register.
+  int findRegisterDefOperandIdx(unsigned Reg,
+                                bool isDead = false, bool Overlap = false,
                                 const TargetRegisterInfo *TRI = NULL) const;
 
   /// findRegisterDefOperand - Wrapper for findRegisterDefOperandIdx, it returns
   /// a pointer to the MachineOperand rather than an index.
   MachineOperand *findRegisterDefOperand(unsigned Reg, bool isDead = false,
                                          const TargetRegisterInfo *TRI = NULL) {
-    int Idx = findRegisterDefOperandIdx(Reg, isDead, TRI);
+    int Idx = findRegisterDefOperandIdx(Reg, isDead, false, TRI);
     return (Idx == -1) ? NULL : &getOperand(Idx);
   }
 
@@ -299,6 +328,10 @@ public:
   /// reference if DefOpIdx is not null.
   bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx = 0) const;
 
+  /// clearKillInfo - Clears kill flags on all operands.
+  ///
+  void clearKillInfo();
+
   /// copyKillDeadInfo - Copies kill / dead operand properties from MI.
   ///
   void copyKillDeadInfo(const MachineInstr *MI);
@@ -324,7 +357,7 @@ public:
   /// addRegisterDefined - We have determined MI defines a register. Make sure
   /// there is an operand defining Reg.
   void addRegisterDefined(unsigned IncomingReg,
-                          const TargetRegisterInfo *RegInfo);
+                          const TargetRegisterInfo *RegInfo = 0);
 
   /// isSafeToMove - Return true if it is safe to move this instruction. If
   /// SawStore is set to true, it means that there is a store (or call) between
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index b377dec..fa14fdc 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -93,6 +93,20 @@ public:
   /// specified register (it may be live-in).
   bool reg_empty(unsigned RegNo) const { return reg_begin(RegNo) == reg_end(); }
 
+  /// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses
+  /// of the specified register, skipping those marked as Debug.
+  typedef defusechain_iterator<true,true,true> reg_nodbg_iterator;
+  reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const {
+    return reg_nodbg_iterator(getRegUseDefListHead(RegNo));
+  }
+  static reg_nodbg_iterator reg_nodbg_end() { return reg_nodbg_iterator(0); }
+
+  /// reg_nodbg_empty - Return true if the only instructions using or defining
+  /// Reg are Debug instructions.
+  bool reg_nodbg_empty(unsigned RegNo) const {
+    return reg_nodbg_begin(RegNo) == reg_nodbg_end();
+  }
+
   /// def_iterator/def_begin/def_end - Walk all defs of the specified register.
   typedef defusechain_iterator<false,true,false> def_iterator;
   def_iterator def_begin(unsigned RegNo) const {
@@ -162,6 +176,12 @@ public:
   /// register or null if none is found.  This assumes that the code is in SSA
   /// form, so there should only be one definition.
   MachineInstr *getVRegDef(unsigned Reg) const;
+
+  /// clearKillFlags - Iterate over all the uses of the given register and
+  /// clear the kill flag from the MachineOperand. This function is used by
+  /// optimization passes which extend register lifetimes and need only
+  /// preserve conservative kill flag information.
+  void clearKillFlags(unsigned Reg) const;
   
 #ifndef NDEBUG
   void dumpUses(unsigned RegNo) const;
@@ -196,7 +216,8 @@ public:
 
   /// getRegClassVirtRegs - Return the list of virtual registers of the given
   /// target register class.
-  std::vector<unsigned> &getRegClassVirtRegs(const TargetRegisterClass *RC) {
+  const std::vector<unsigned> &
+  getRegClassVirtRegs(const TargetRegisterClass *RC) const {
     return RegClass2VRegMap[RC->getID()];
   }
 
@@ -229,11 +250,18 @@ public:
   /// setPhysRegUsed - Mark the specified register used in this function.
   /// This should only be called during and after register allocation.
   void setPhysRegUsed(unsigned Reg) { UsedPhysRegs[Reg] = true; }
-  
+
+  /// addPhysRegsUsed - Mark the specified registers used in this function.
+  /// This should only be called during and after register allocation.
+  void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; }
+
   /// setPhysRegUnused - Mark the specified register unused in this function.
   /// This should only be called during and after register allocation.
   void setPhysRegUnused(unsigned Reg) { UsedPhysRegs[Reg] = false; }
-  
+
+  /// closePhysRegsUsed - Expand UsedPhysRegs to its transitive closure over
+  /// subregisters. That means that if R is used, so are all subregisters.
+  void closePhysRegsUsed(const TargetRegisterInfo&);
 
   //===--------------------------------------------------------------------===//
   // LiveIn/LiveOut Management
@@ -265,6 +293,10 @@ public:
   /// corresponding live-in physical register.
   unsigned getLiveInPhysReg(unsigned VReg) const;
 
+  /// getLiveInVirtReg - If PReg is a live-in physical register, return the
+  /// corresponding live-in physical register.
+  unsigned getLiveInVirtReg(unsigned PReg) const;
+
   /// EmitLiveInCopies - Emit copies to initialize livein virtual registers
   /// into the given entry block.
   void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index 979ef01..cbb45a7 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -23,6 +23,7 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterClass;
   template<typename T> class SmallVectorImpl;
+  template<typename T> class SSAUpdaterTraits;
   class BumpPtrAllocator;
 
 /// MachineSSAUpdater - This class updates SSA form for a set of virtual
@@ -30,9 +31,7 @@ namespace llvm {
 /// or another unstructured transformation wants to rewrite a set of uses of one
 /// vreg with uses of a set of vregs.
 class MachineSSAUpdater {
-public:
-  class BBInfo;
-  typedef SmallVectorImpl<BBInfo*> BlockListTy;
+  friend class SSAUpdaterTraits<MachineSSAUpdater>;
 
 private:
   /// AvailableVals - This keeps track of which value to use on a per-block
@@ -40,11 +39,6 @@ private:
   //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy;
   void *AV;
 
-  /// BBMap - The GetValueAtEndOfBlock method maintains this mapping from
-  /// basic blocks to BBInfo structures.
-  /// typedef DenseMap<MachineBasicBlock*, BBInfo*> BBMapTy;
-  void *BM;
-
   /// VR - Current virtual register whose uses are being updated.
   unsigned VR;
 
@@ -111,14 +105,6 @@ public:
 private:
   void ReplaceRegWith(unsigned OldReg, unsigned NewReg);
   unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
-  void BuildBlockList(MachineBasicBlock *BB, BlockListTy *BlockList,
-                      BumpPtrAllocator *Allocator);
-  void FindDominators(BlockListTy *BlockList);
-  void FindPHIPlacement(BlockListTy *BlockList);
-  void FindAvailableVals(BlockListTy *BlockList);
-  void FindExistingPHI(MachineBasicBlock *BB, BlockListTy *BlockList);
-  bool CheckIfPHIMatches(MachineInstr *PHI);
-  void RecordMatchingPHI(MachineInstr *PHI);
 
   void operator=(const MachineSSAUpdater&); // DO NOT IMPLEMENT
   MachineSSAUpdater(const MachineSSAUpdater&);     // DO NOT IMPLEMENT
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 7c025e3..076268b 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -16,6 +16,7 @@
 #define LLVM_CODEGEN_SCHEDULEDAG_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/GraphTraits.h"
@@ -34,7 +35,6 @@ namespace llvm {
   class SDNode;
   class TargetInstrInfo;
   class TargetInstrDesc;
-  class TargetLowering;
   class TargetMachine;
   class TargetRegisterClass;
   template<class Graph> class GraphWriter;
@@ -226,7 +226,6 @@ namespace llvm {
   private:
     SDNode *Node;                       // Representative node.
     MachineInstr *Instr;                // Alternatively, a MachineInstr.
-    MachineInstr *DbgInstr;             // A dbg_value referencing this.
   public:
     SUnit *OrigNode;                    // If not this, the node from which
                                         // this node was cloned.
@@ -240,7 +239,7 @@ namespace llvm {
     typedef SmallVector<SDep, 4>::iterator succ_iterator;
     typedef SmallVector<SDep, 4>::const_iterator const_pred_iterator;
     typedef SmallVector<SDep, 4>::const_iterator const_succ_iterator;
-    
+
     unsigned NodeNum;                   // Entry # of node in the node vector.
     unsigned NodeQueueId;               // Queue id of node.
     unsigned short Latency;             // Node latency.
@@ -257,6 +256,9 @@ namespace llvm {
     bool isScheduled      : 1;          // True once scheduled.
     bool isScheduleHigh   : 1;          // True if preferable to schedule high.
     bool isCloned         : 1;          // True if this node has been cloned.
+    Sched::Preference SchedulingPref;   // Scheduling preference.
+
+    SmallVector<MachineInstr*, 4> DbgInstrList; // dbg_values referencing this.
   private:
     bool isDepthCurrent   : 1;          // True if Depth is current.
     bool isHeightCurrent  : 1;          // True if Height is current.
@@ -269,35 +271,38 @@ namespace llvm {
     /// SUnit - Construct an SUnit for pre-regalloc scheduling to represent
     /// an SDNode and any nodes flagged to it.
     SUnit(SDNode *node, unsigned nodenum)
-      : Node(node), Instr(0), DbgInstr(0), OrigNode(0), NodeNum(nodenum),
+      : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
         NodeQueueId(0),  Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
         hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isCloned(false),
+        SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
 
     /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
     /// a MachineInstr.
     SUnit(MachineInstr *instr, unsigned nodenum)
-      : Node(0), Instr(instr), DbgInstr(0), OrigNode(0), NodeNum(nodenum),
+      : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
         NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
         hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isCloned(false),
+        SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
 
     /// SUnit - Construct a placeholder SUnit.
     SUnit()
-      : Node(0), Instr(0), DbgInstr(0), OrigNode(0), NodeNum(~0u),
+      : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
         NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
         hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isCloned(false),
+        SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -329,20 +334,6 @@ namespace llvm {
       return Instr;
     }
 
-    /// setDbgInstr - Assign the debug instruction for the SUnit.
-    /// This may be used during post-regalloc scheduling.
-    void setDbgInstr(MachineInstr *MI) {
-      assert(!Node && "Setting debug MachineInstr of SUnit with SDNode!");
-      DbgInstr = MI;
-    }
-
-    /// getDbgInstr - Return the debug MachineInstr for this SUnit.
-    /// This may be used during post-regalloc scheduling.
-    MachineInstr *getDbgInstr() const {
-      assert(!Node && "Reading debug MachineInstr of SUnit with SDNode!");
-      return DbgInstr;
-    }
-
     /// addPred - This adds the specified edge as a pred of the current node if
     /// not already.  It also adds the current node as a successor of the
     /// specified node.
@@ -404,7 +395,7 @@ namespace llvm {
           return true;
       return false;
     }
-    
+
     void dump(const ScheduleDAG *G) const;
     void dumpAll(const ScheduleDAG *G) const;
     void print(raw_ostream &O, const ScheduleDAG *G) const;
@@ -423,7 +414,9 @@ namespace llvm {
   /// implementation to decide.
   /// 
   class SchedulingPriorityQueue {
+    unsigned CurCycle;
   public:
+    SchedulingPriorityQueue() : CurCycle(0) {}
     virtual ~SchedulingPriorityQueue() {}
   
     virtual void initNodes(std::vector<SUnit> &SUnits) = 0;
@@ -431,11 +424,15 @@ namespace llvm {
     virtual void updateNode(const SUnit *SU) = 0;
     virtual void releaseState() = 0;
 
-    virtual unsigned size() const = 0;
     virtual bool empty() const = 0;
     virtual void push(SUnit *U) = 0;
   
-    virtual void push_all(const std::vector<SUnit *> &Nodes) = 0;
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (std::vector<SUnit *>::const_iterator I = Nodes.begin(),
+           E = Nodes.end(); I != E; ++I)
+        push(*I);
+    }
+
     virtual SUnit *pop() = 0;
 
     virtual void remove(SUnit *SU) = 0;
@@ -447,6 +444,14 @@ namespace llvm {
     virtual void ScheduledNode(SUnit *) {}
 
     virtual void UnscheduledNode(SUnit *) {}
+
+    void setCurCycle(unsigned Cycle) {
+      CurCycle = Cycle;
+    }
+
+    unsigned getCurCycle() const {
+      return CurCycle;
+    }    
   };
 
   class ScheduleDAG {
@@ -456,7 +461,6 @@ namespace llvm {
     const TargetMachine &TM;              // Target processor
     const TargetInstrInfo *TII;           // Target instruction information
     const TargetRegisterInfo *TRI;        // Target processor register info
-    const TargetLowering *TLI;            // Target lowering info
     MachineFunction &MF;                  // Machine function
     MachineRegisterInfo &MRI;             // Virtual/real register map
     std::vector<SUnit*> Sequence;         // The schedule. Null SUnit*'s
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index cf3274f..14c33e2 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -73,11 +73,17 @@ ScheduleDAGSDNodes *createBURRListDAGScheduler(SelectionDAGISel *IS,
 ScheduleDAGSDNodes *createTDRRListDAGScheduler(SelectionDAGISel *IS,
                                                CodeGenOpt::Level OptLevel);
 
-/// createBURRListDAGScheduler - This creates a bottom up register usage
-/// reduction list scheduler that schedules in source code order when possible.
+/// createBURRListDAGScheduler - This creates a bottom up list scheduler that
+/// schedules nodes in source code order when possible.
 ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS,
                                                  CodeGenOpt::Level OptLevel);
 
+/// createHybridListDAGScheduler - This creates a bottom up hybrid register
+/// usage reduction list scheduler that make use of latency information to
+/// avoid stalls for long latency instructions.
+ScheduleDAGSDNodes *createHybridListDAGScheduler(SelectionDAGISel *IS,
+                                                 CodeGenOpt::Level);
+
 /// createTDListDAGScheduler - This creates a top-down list scheduler with
 /// a hazard recognizer.
 ScheduleDAGSDNodes *createTDListDAGScheduler(SelectionDAGISel *IS,
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index ae15230..97202bd 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -36,6 +36,7 @@ class MDNode;
 class SDNodeOrdering;
 class SDDbgValue;
 class TargetLowering;
+class TargetSelectionDAGInfo;
 
 template<> struct ilist_traits<SDNode> : public ilist_default_traits<SDNode> {
 private:
@@ -131,6 +132,7 @@ void checkForCycles(const SelectionDAG *DAG);
 class SelectionDAG {
   const TargetMachine &TM;
   const TargetLowering &TLI;
+  const TargetSelectionDAGInfo &TSI;
   MachineFunction *MF;
   FunctionLoweringInfo &FLI;
   LLVMContext *Context;
@@ -201,6 +203,7 @@ public:
   MachineFunction &getMachineFunction() const { return *MF; }
   const TargetMachine &getTarget() const { return TM; }
   const TargetLowering &getTargetLoweringInfo() const { return TLI; }
+  const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; }
   FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; }
   LLVMContext *getContext() const {return Context; }
 
@@ -334,6 +337,8 @@ public:
   SDValue getTargetConstant(const ConstantInt &Val, EVT VT) {
     return getConstant(Val, VT, true);
   }
+  // The forms below that take a double should only be used for simple
+  // constants that can be exactly represented in VT.  No checks are made.
   SDValue getConstantFP(double Val, EVT VT, bool isTarget = false);
   SDValue getConstantFP(const APFloat& Val, EVT VT, bool isTarget = false);
   SDValue getConstantFP(const ConstantFP &CF, EVT VT, bool isTarget = false);
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 90905f5..3aaab88 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -84,6 +84,23 @@ public:
 
 
 class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
+  /// TLSDataSection - Section for thread local data.
+  ///
+  const MCSection *TLSDataSection;        // Defaults to ".tdata".
+
+  /// TLSBSSSection - Section for thread local uninitialized data.
+  ///
+  const MCSection *TLSBSSSection;         // Defaults to ".tbss".
+  
+  /// TLSTLVSection - Section for thread local structure infomation.
+  /// Contains the source code name of the variable, visibility and a pointer
+  /// to the initial value (.tdata or .tbss).
+  const MCSection *TLSTLVSection;         // Defaults to ".tlv".
+  
+  /// TLSThreadInitSection - Section for thread local data initialization
+  /// functions.
+  const MCSection *TLSThreadInitSection;  // Defaults to ".thread_init_func".
+  
   const MCSection *CStringSection;
   const MCSection *UStringSection;
   const MCSection *TextCoalSection;
@@ -161,13 +178,15 @@ public:
 
 
 class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
-  mutable void *UniquingMap;
+  const MCSection *DrectveSection;
 public:
-  TargetLoweringObjectFileCOFF() : UniquingMap(0) {}
-  ~TargetLoweringObjectFileCOFF();
+  TargetLoweringObjectFileCOFF() {}
+  ~TargetLoweringObjectFileCOFF() {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
+  virtual const MCSection *getDrectveSection() const { return DrectveSection; }
+
   virtual const MCSection *
   getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                            Mangler *Mang, const TargetMachine &TM) const;
@@ -175,11 +194,6 @@ public:
   virtual const MCSection *
   SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const;
-
-  /// getCOFFSection - Return the MCSection for the specified COFF section.
-  /// FIXME: Switch this to a semantic view eventually.
-  const MCSection *getCOFFSection(StringRef Name, bool isDirective,
-                                  SectionKind K) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index a7aafc0..6e2a102 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -63,21 +63,22 @@ namespace llvm {
       v1i64          =  24,   //  1 x i64
       v2i64          =  25,   //  2 x i64
       v4i64          =  26,   //  4 x i64
+      v8i64          =  27,   //  8 x i64
 
-      v2f32          =  27,   //  2 x f32
-      v4f32          =  28,   //  4 x f32
-      v8f32          =  29,   //  8 x f32
-      v2f64          =  30,   //  2 x f64
-      v4f64          =  31,   //  4 x f64
+      v2f32          =  28,   //  2 x f32
+      v4f32          =  29,   //  4 x f32
+      v8f32          =  30,   //  8 x f32
+      v2f64          =  31,   //  2 x f64
+      v4f64          =  32,   //  4 x f64
 
       FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v4f64,
 
-      Flag           =  32,   // This glues nodes together during pre-RA sched
+      Flag           =  33,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  33,   // This has no value
+      isVoid         =  34,   // This has no value
 
-      LAST_VALUETYPE =  34,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  35,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // EVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -140,7 +141,7 @@ namespace llvm {
     bool isInteger() const {
       return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
                SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
-               (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v4i64));
+               (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v8i64));
     }
 
     /// isVector - Return true if this is a vector value type.
@@ -192,7 +193,8 @@ namespace llvm {
       case v8i32: return i32;
       case v1i64:
       case v2i64:
-      case v4i64: return i64;
+      case v4i64:
+      case v8i64: return i64;
       case v2f32:
       case v4f32:
       case v8f32: return f32;
@@ -211,6 +213,7 @@ namespace llvm {
       case v8i8 :
       case v8i16:
       case v8i32:
+      case v8i64:
       case v8f32: return 8;
       case v4i8:
       case v4i16:
@@ -269,6 +272,7 @@ namespace llvm {
       case v4i64:
       case v8f32:
       case v4f64: return 256;
+      case v8i64: return 512;
       }
     }
     
@@ -332,6 +336,7 @@ namespace llvm {
         if (NumElements == 1)  return MVT::v1i64;
         if (NumElements == 2)  return MVT::v2i64;
         if (NumElements == 4)  return MVT::v4i64;
+        if (NumElements == 8)  return MVT::v8i64;
         break;
       case MVT::f32:
         if (NumElements == 2)  return MVT::v2f32;
@@ -468,10 +473,15 @@ namespace llvm {
 
     /// is256BitVector - Return true if this is a 256-bit vector type.
     inline bool is256BitVector() const {
-      return isSimple() ?
-             (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 ||
-              V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) : 
-            isExtended256BitVector();
+      return isSimple()
+        ? (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 ||
+           V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64)
+        : isExtended256BitVector();
+    }
+
+    /// is512BitVector - Return true if this is a 512-bit vector type.
+    inline bool is512BitVector() const {
+      return isSimple() ? (V == MVT::v8i64) : isExtended512BitVector();
     }
 
     /// isOverloaded - Return true if this is an overloaded type for TableGen.
@@ -668,6 +678,7 @@ namespace llvm {
     bool isExtended64BitVector() const;
     bool isExtended128BitVector() const;
     bool isExtended256BitVector() const;
+    bool isExtended512BitVector() const;
     EVT getExtendedVectorElementType() const;
     unsigned getExtendedVectorNumElements() const;
     unsigned getExtendedSizeInBits() const;
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index c8bb789..8151c0b 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -47,15 +47,16 @@ def v8i32  : ValueType<256, 23>;   //  8 x i32 vector value
 def v1i64  : ValueType<64 , 24>;   //  1 x i64 vector value
 def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
 def v4i64  : ValueType<256, 26>;   //  4 x f64 vector value
+def v8i64  : ValueType<512, 27>;   //  4 x f64 vector value
 
-def v2f32  : ValueType<64,  27>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 28>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 29>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 30>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 31>;   //  4 x f64 vector value
+def v2f32  : ValueType<64,  28>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 29>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 30>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 31>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 32>;   //  4 x f64 vector value
 
-def FlagVT : ValueType<0  , 32>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 33>;   // Produces no value
+def FlagVT : ValueType<0  , 33>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 34>;   // Produces no value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index adbcc11..e7594ba 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -312,6 +312,9 @@
 /* Define to 1 if you have the `roundf' function. */
 #undef HAVE_ROUNDF
 
+/* Define to 1 if you have the `round' function. */
+#cmakedefine HAVE_ROUND ${HAVE_ROUND}
+
 /* Define to 1 if you have the `sbrk' function. */
 #cmakedefine HAVE_SBRK ${HAVE_SBRK}
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index c2ab23e..99d2ab5 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -24,6 +24,9 @@
 /* Define if threads enabled */
 #undef ENABLE_THREADS
 
+/* Define if timestamp information (e.g., __DATE___) is allowed */
+#undef ENABLE_TIMESTAMPS
+
 /* Define to 1 if you have the `argz_append' function. */
 #undef HAVE_ARGZ_APPEND
 
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 1cebb20..9ca845e 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -691,9 +691,6 @@ public:
   // ConstantExpr class, because they will attempt to fold the constant
   // expression into something simpler if possible.
 
-  /// Cast constant expr
-  ///
-
   /// getAlignOf constant expr - computes the alignment of a type in a target
   /// independent way (Note: the return type is an i64).
   static Constant *getAlignOf(const Type* Ty);
@@ -926,7 +923,11 @@ DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantExpr, Constant)
 /// UndefValue - 'undef' values are things that do not have specified contents.
 /// These are used for a variety of purposes, including global variable
 /// initializers and operands to instructions.  'undef' values can occur with
-/// any type.
+/// any first-class type.
+///
+/// Undef values aren't exactly constants; if they have multiple uses, they
+/// can appear to have different bit patterns at each use. See
+/// LangRef.html#undefvalues for details.
 ///
 class UndefValue : public Constant {
   friend struct ConstantCreator<UndefValue, Type, char>;
@@ -957,6 +958,7 @@ public:
     return V->getValueID() == UndefValueVal;
   }
 };
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 8bbfd77..2b4df54 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -306,11 +306,13 @@ def int_eh_unwind_init: Intrinsic<[]>,
 def int_eh_dwarf_cfa  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
 
 let Properties = [IntrNoMem] in {
-  def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
-  def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
   def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
   def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>;
 }
+def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>,
+                                     GCCBuiltin<"__builtin_setjmp">;
+def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>,
+                                     GCCBuiltin<"__builtin_longjmp">;
 
 //===---------------- Generic Variable Attribute Intrinsics----------------===//
 //
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 3ca9cb4..cea4856 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -786,9 +786,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
                         [IntrNoMem]>;
-  def int_x86_aesni_aeskeygenassist : 
+  def int_x86_aesni_aeskeygenassist :
               GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
                         [IntrNoMem]>;
 }
 
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 1e2c37a..c2375ea 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -132,12 +132,14 @@ namespace {
       (void) llvm::createPrintModulePass(0);
       (void) llvm::createPrintFunctionPass("", 0);
       (void) llvm::createDbgInfoPrinterPass();
+      (void) llvm::createModuleDebugInfoPrinterPass();
       (void) llvm::createPartialInliningPass();
       (void) llvm::createSSIPass();
       (void) llvm::createSSIEverythingPass();
       (void) llvm::createGEPSplitterPass();
       (void) llvm::createABCDPass();
       (void) llvm::createLintPass();
+      (void) llvm::createSinkingPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index f57f642..8516de0 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -41,6 +41,10 @@ namespace llvm {
     /// the macho-specific .zerofill directive for emitting BSS Symbols.
     bool HasMachoZeroFillDirective;               // Default is false.
     
+    /// HasMachoTBSSDirective - True if this is a MachO target that supports
+    /// the macho-specific .tbss directive for emitting thread local BSS Symbols
+    bool HasMachoTBSSDirective;                 // Default is false.
+    
     /// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should
     /// emit a ".reference .constructors_used" or ".reference .destructors_used"
     /// directive after the a static ctor/dtor list.  This directive is only
@@ -303,6 +307,7 @@ namespace llvm {
     // Accessors.
     //
     bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
+    bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
     bool hasStaticCtorDtorReferenceInStaticMode() const {
       return HasStaticCtorDtorReferenceInStaticMode;
     }
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index ebf0520..b9565ba 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -10,6 +10,8 @@
 #ifndef LLVM_MC_MCASMLAYOUT_H
 #define LLVM_MC_MCASMLAYOUT_H
 
+#include "llvm/ADT/SmallVector.h"
+
 namespace llvm {
 class MCAssembler;
 class MCFragment;
@@ -24,11 +26,30 @@ class MCSymbolData;
 /// efficiently compute the exact addresses of any symbol in the assembly file,
 /// even during the relaxation process.
 class MCAsmLayout {
+public:
+  typedef llvm::SmallVectorImpl<MCSectionData*>::const_iterator const_iterator;
+  typedef llvm::SmallVectorImpl<MCSectionData*>::iterator iterator;
+
 private:
   MCAssembler &Assembler;
 
+  /// List of sections in layout order.
+  llvm::SmallVector<MCSectionData*, 16> SectionOrder;
+
+  /// The last fragment which was layed out, or 0 if nothing has been layed
+  /// out. Fragments are always layed out in order, so all fragments with a
+  /// lower ordinal will be up to date.
+  mutable MCFragment *LastValidFragment;
+
+  /// \brief Make sure that the layout for the given fragment is valid, lazily
+  /// computing it if necessary.
+  void EnsureValid(const MCFragment *F) const;
+
+  bool isSectionUpToDate(const MCSectionData *SD) const;
+  bool isFragmentUpToDate(const MCFragment *F) const;
+
 public:
-  MCAsmLayout(MCAssembler &_Assembler) : Assembler(_Assembler) {}
+  MCAsmLayout(MCAssembler &_Assembler);
 
   /// Get the assembler object this is a layout for.
   MCAssembler &getAssembler() const { return Assembler; }
@@ -38,6 +59,33 @@ public:
   /// the delta from the old size.
   void UpdateForSlide(MCFragment *F, int SlideAmount);
 
+  /// \brief Update the layout because a fragment has been replaced.
+  void FragmentReplaced(MCFragment *Src, MCFragment *Dst);
+
+  /// \brief Perform a full layout.
+  void LayoutFile();
+
+  /// \brief Perform layout for a single fragment, assuming that the previous
+  /// fragment has already been layed out correctly, and the parent section has
+  /// been initialized.
+  void LayoutFragment(MCFragment *Fragment);
+
+  /// \brief Performs initial layout for a single section, assuming that the
+  /// previous section (including its fragments) has already been layed out
+  /// correctly.
+  void LayoutSection(MCSectionData *SD);
+
+  /// @name Section Access (in layout order)
+  /// @{
+
+  llvm::SmallVectorImpl<MCSectionData*> &getSectionOrder() {
+    return SectionOrder;
+  }
+  const llvm::SmallVectorImpl<MCSectionData*> &getSectionOrder() const {
+    return SectionOrder;
+  }
+
+  /// @}
   /// @name Fragment Layout Data
   /// @{
 
@@ -45,15 +93,9 @@ public:
   /// current layout.
   uint64_t getFragmentEffectiveSize(const MCFragment *F) const;
 
-  /// \brief Set the effective size of the given fragment.
-  void setFragmentEffectiveSize(MCFragment *F, uint64_t Value);
-
   /// \brief Get the offset of the given fragment inside its containing section.
   uint64_t getFragmentOffset(const MCFragment *F) const;
 
-  /// \brief Set the offset of the given fragment inside its containing section.
-  void setFragmentOffset(MCFragment *F, uint64_t Value);
-
   /// @}
   /// @name Section Layout Data
   /// @{
@@ -61,22 +103,6 @@ public:
   /// \brief Get the computed address of the given section.
   uint64_t getSectionAddress(const MCSectionData *SD) const;
 
-  /// \brief Set the computed address of the given section.
-  void setSectionAddress(MCSectionData *SD, uint64_t Value);
-
-  /// \brief Get the data size of the given section, as emitted to the object
-  /// file. This may include additional padding, or be 0 for virtual sections.
-  uint64_t getSectionFileSize(const MCSectionData *SD) const;
-
-  /// \brief Set the data size of the given section.
-  void setSectionFileSize(MCSectionData *SD, uint64_t Value);
-
-  /// \brief Get the actual data size of the given section.
-  uint64_t getSectionSize(const MCSectionData *SD) const;
-
-  /// \brief Set the actual data size of the given section.
-  void setSectionSize(MCSectionData *SD, uint64_t Value);
-
   /// @}
   /// @name Utility Functions
   /// @{
@@ -85,6 +111,18 @@ public:
   /// layout.
   uint64_t getFragmentAddress(const MCFragment *F) const;
 
+  /// \brief Get the address space size of the given section, as it effects
+  /// layout. This may differ from the size reported by \see getSectionSize() by
+  /// not including section tail padding.
+  uint64_t getSectionAddressSize(const MCSectionData *SD) const;
+
+  /// \brief Get the data size of the given section, as emitted to the object
+  /// file. This may include additional padding, or be 0 for virtual sections.
+  uint64_t getSectionFileSize(const MCSectionData *SD) const;
+
+  /// \brief Get the logical data size of the given section.
+  uint64_t getSectionSize(const MCSectionData *SD) const;
+
   /// \brief Get the address of the given symbol, as computed in the current
   /// layout.
   uint64_t getSymbolAddress(const MCSymbolData *SD) const;
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index c1b60f0..d9963ec 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -32,30 +32,10 @@ class MCObjectWriter;
 class MCSection;
 class MCSectionData;
 class MCSymbol;
+class MCSymbolData;
 class MCValue;
 class TargetAsmBackend;
 
-/// MCAsmFixup - Represent a fixed size region of bytes inside some fragment
-/// which needs to be rewritten. This region will either be rewritten by the
-/// assembler or cause a relocation entry to be generated.
-//
-// FIXME: This should probably just be merged with MCFixup.
-class MCAsmFixup {
-public:
-  /// Offset - The offset inside the fragment which needs to be rewritten.
-  uint64_t Offset;
-
-  /// Value - The expression to eventually write into the fragment.
-  const MCExpr *Value;
-
-  /// Kind - The fixup kind.
-  MCFixupKind Kind;
-
-public:
-  MCAsmFixup(uint64_t _Offset, const MCExpr &_Value, MCFixupKind _Kind)
-    : Offset(_Offset), Value(&_Value), Kind(_Kind) {}
-};
-
 class MCFragment : public ilist_node<MCFragment> {
   friend class MCAsmLayout;
 
@@ -68,8 +48,7 @@ public:
     FT_Data,
     FT_Fill,
     FT_Inst,
-    FT_Org,
-    FT_ZeroFill
+    FT_Org
   };
 
 private:
@@ -78,6 +57,11 @@ private:
   /// Parent - The data for the section this fragment is in.
   MCSectionData *Parent;
 
+  /// Atom - The atom this fragment is in, as represented by it's defining
+  /// symbol. Atom's are only used by backends which set
+  /// \see MCAsmBackend::hasReliableSymbolDifference().
+  MCSymbolData *Atom;
+
   /// @name Assembler Backend Data
   /// @{
   //
@@ -91,9 +75,9 @@ private:
   /// initialized.
   uint64_t EffectiveSize;
 
-  /// Ordinal - The global index of this fragment. This is the index across all
-  /// sections, not just the parent section.
-  unsigned Ordinal;
+  /// LayoutOrder - The global layout order of this fragment. This is the index
+  /// across all fragments in the file, not just within the section.
+  unsigned LayoutOrder;
 
   /// @}
 
@@ -103,30 +87,32 @@ protected:
 public:
   // Only for sentinel.
   MCFragment();
-  virtual ~MCFragment();
 
   FragmentType getKind() const { return Kind; }
 
   MCSectionData *getParent() const { return Parent; }
   void setParent(MCSectionData *Value) { Parent = Value; }
 
-  unsigned getOrdinal() const { return Ordinal; }
-  void setOrdinal(unsigned Value) { Ordinal = Value; }
+  MCSymbolData *getAtom() const { return Atom; }
+  void setAtom(MCSymbolData *Value) { Atom = Value; }
+
+  unsigned getLayoutOrder() const { return LayoutOrder; }
+  void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
 
   static bool classof(const MCFragment *O) { return true; }
 
-  virtual void dump();
+  void dump();
 };
 
 class MCDataFragment : public MCFragment {
   SmallString<32> Contents;
 
   /// Fixups - The list of fixups in this fragment.
-  std::vector<MCAsmFixup> Fixups;
+  std::vector<MCFixup> Fixups;
 
 public:
-  typedef std::vector<MCAsmFixup>::const_iterator const_fixup_iterator;
-  typedef std::vector<MCAsmFixup>::iterator fixup_iterator;
+  typedef std::vector<MCFixup>::const_iterator const_fixup_iterator;
+  typedef std::vector<MCFixup>::iterator fixup_iterator;
 
 public:
   MCDataFragment(MCSectionData *SD = 0) : MCFragment(FT_Data, SD) {}
@@ -141,15 +127,15 @@ public:
   /// @name Fixup Access
   /// @{
 
-  void addFixup(MCAsmFixup Fixup) {
+  void addFixup(MCFixup Fixup) {
     // Enforce invariant that fixups are in offset order.
-    assert((Fixups.empty() || Fixup.Offset > Fixups.back().Offset) &&
+    assert((Fixups.empty() || Fixup.getOffset() > Fixups.back().getOffset()) &&
            "Fixups must be added in order!");
     Fixups.push_back(Fixup);
   }
 
-  std::vector<MCAsmFixup> &getFixups() { return Fixups; }
-  const std::vector<MCAsmFixup> &getFixups() const { return Fixups; }
+  std::vector<MCFixup> &getFixups() { return Fixups; }
+  const std::vector<MCFixup> &getFixups() const { return Fixups; }
 
   fixup_iterator fixup_begin() { return Fixups.begin(); }
   const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
@@ -165,8 +151,6 @@ public:
     return F->getKind() == MCFragment::FT_Data;
   }
   static bool classof(const MCDataFragment *) { return true; }
-
-  virtual void dump();
 };
 
 // FIXME: This current incarnation of MCInstFragment doesn't make much sense, as
@@ -182,11 +166,11 @@ class MCInstFragment : public MCFragment {
   SmallString<8> Code;
 
   /// Fixups - The list of fixups in this fragment.
-  SmallVector<MCAsmFixup, 1> Fixups;
+  SmallVector<MCFixup, 1> Fixups;
 
 public:
-  typedef SmallVectorImpl<MCAsmFixup>::const_iterator const_fixup_iterator;
-  typedef SmallVectorImpl<MCAsmFixup>::iterator fixup_iterator;
+  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
+  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
 
 public:
   MCInstFragment(MCInst _Inst, MCSectionData *SD = 0)
@@ -210,8 +194,8 @@ public:
   /// @name Fixup Access
   /// @{
 
-  SmallVectorImpl<MCAsmFixup> &getFixups() { return Fixups; }
-  const SmallVectorImpl<MCAsmFixup> &getFixups() const { return Fixups; }
+  SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
+  const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
 
   fixup_iterator fixup_begin() { return Fixups.begin(); }
   const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
@@ -227,8 +211,6 @@ public:
     return F->getKind() == MCFragment::FT_Inst;
   }
   static bool classof(const MCInstFragment *) { return true; }
-
-  virtual void dump();
 };
 
 class MCAlignFragment : public MCFragment {
@@ -245,17 +227,24 @@ class MCAlignFragment : public MCFragment {
   /// cannot be satisfied in this width then this fragment is ignored.
   unsigned MaxBytesToEmit;
 
-  /// EmitNops - true when aligning code and optimal nops to be used for
-  /// filling.
-  bool EmitNops;
+  /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
+  /// of using the provided value. The exact interpretation of this flag is
+  /// target dependent.
+  bool EmitNops : 1;
+
+  /// OnlyAlignAddress - Flag to indicate that this align is only used to adjust
+  /// the address space size of a section and that it should not be included as
+  /// part of the section size. This flag can only be used on the last fragment
+  /// in a section.
+  bool OnlyAlignAddress : 1;
 
 public:
   MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize,
-                  unsigned _MaxBytesToEmit, bool _EmitNops,
-		  MCSectionData *SD = 0)
+                  unsigned _MaxBytesToEmit, MCSectionData *SD = 0)
     : MCFragment(FT_Align, SD), Alignment(_Alignment),
       Value(_Value),ValueSize(_ValueSize),
-      MaxBytesToEmit(_MaxBytesToEmit), EmitNops(_EmitNops) {}
+      MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false),
+      OnlyAlignAddress(false) {}
 
   /// @name Accessors
   /// @{
@@ -268,7 +257,11 @@ public:
 
   unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
 
-  unsigned getEmitNops() const { return EmitNops; }
+  bool hasEmitNops() const { return EmitNops; }
+  void setEmitNops(bool Value) { EmitNops = Value; }
+
+  bool hasOnlyAlignAddress() const { return OnlyAlignAddress; }
+  void setOnlyAlignAddress(bool Value) { OnlyAlignAddress = Value; }
 
   /// @}
 
@@ -276,25 +269,27 @@ public:
     return F->getKind() == MCFragment::FT_Align;
   }
   static bool classof(const MCAlignFragment *) { return true; }
-
-  virtual void dump();
 };
 
 class MCFillFragment : public MCFragment {
   /// Value - Value to use for filling bytes.
   int64_t Value;
 
-  /// ValueSize - The size (in bytes) of \arg Value to use when filling.
+  /// ValueSize - The size (in bytes) of \arg Value to use when filling, or 0 if
+  /// this is a virtual fill fragment.
   unsigned ValueSize;
 
-  /// Count - The number of copies of \arg Value to insert.
-  uint64_t Count;
+  /// Size - The number of bytes to insert.
+  uint64_t Size;
 
 public:
-  MCFillFragment(int64_t _Value, unsigned _ValueSize, uint64_t _Count,
+  MCFillFragment(int64_t _Value, unsigned _ValueSize, uint64_t _Size,
                  MCSectionData *SD = 0)
     : MCFragment(FT_Fill, SD),
-      Value(_Value), ValueSize(_ValueSize), Count(_Count) {}
+      Value(_Value), ValueSize(_ValueSize), Size(_Size) {
+    assert((!ValueSize || (Size % ValueSize) == 0) &&
+           "Fill size must be a multiple of the value size!");
+  }
 
   /// @name Accessors
   /// @{
@@ -303,7 +298,7 @@ public:
 
   unsigned getValueSize() const { return ValueSize; }
 
-  uint64_t getCount() const { return Count; }
+  uint64_t getSize() const { return Size; }
 
   /// @}
 
@@ -311,8 +306,6 @@ public:
     return F->getKind() == MCFragment::FT_Fill;
   }
   static bool classof(const MCFillFragment *) { return true; }
-
-  virtual void dump();
 };
 
 class MCOrgFragment : public MCFragment {
@@ -340,39 +333,6 @@ public:
     return F->getKind() == MCFragment::FT_Org;
   }
   static bool classof(const MCOrgFragment *) { return true; }
-
-  virtual void dump();
-};
-
-/// MCZeroFillFragment - Represent data which has a fixed size and alignment,
-/// but requires no physical space in the object file.
-class MCZeroFillFragment : public MCFragment {
-  /// Size - The size of this fragment.
-  uint64_t Size;
-
-  /// Alignment - The alignment for this fragment.
-  unsigned Alignment;
-
-public:
-  MCZeroFillFragment(uint64_t _Size, unsigned _Alignment, MCSectionData *SD = 0)
-    : MCFragment(FT_ZeroFill, SD),
-      Size(_Size), Alignment(_Alignment) {}
-
-  /// @name Accessors
-  /// @{
-
-  uint64_t getSize() const { return Size; }
-
-  unsigned getAlignment() const { return Alignment; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_ZeroFill;
-  }
-  static bool classof(const MCZeroFillFragment *) { return true; }
-
-  virtual void dump();
 };
 
 // FIXME: Should this be a separate class, or just merged into MCSection? Since
@@ -400,6 +360,9 @@ private:
   /// Ordinal - The section index in the assemblers section list.
   unsigned Ordinal;
 
+  /// LayoutOrder - The index of this section in the layout order.
+  unsigned LayoutOrder;
+
   /// Alignment - The maximum alignment seen in this section.
   unsigned Alignment;
 
@@ -412,13 +375,6 @@ private:
   /// initialized.
   uint64_t Address;
 
-  /// Size - The content size of this section. This is ~0 until initialized.
-  uint64_t Size;
-
-  /// FileSize - The size of this section in the object file. This is ~0 until
-  /// initialized.
-  uint64_t FileSize;
-
   /// HasInstructions - Whether this section has had instructions emitted into
   /// it.
   unsigned HasInstructions : 1;
@@ -441,6 +397,9 @@ public:
   unsigned getOrdinal() const { return Ordinal; }
   void setOrdinal(unsigned Value) { Ordinal = Value; }
 
+  unsigned getLayoutOrder() const { return LayoutOrder; }
+  void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
+
   /// @name Fragment Access
   /// @{
 
@@ -562,6 +521,11 @@ public:
   /// setFlags - Set the (implementation defined) symbol flags.
   void setFlags(uint32_t Value) { Flags = Value; }
 
+  /// modifyFlags - Modify the flags via a mask
+  void modifyFlags(uint32_t Value, uint32_t Mask) {
+    Flags = (Flags & ~Mask) | Value;
+  }
+
   /// getIndex - Get the (implementation defined) index.
   uint64_t getIndex() const { return Index; }
 
@@ -642,26 +606,23 @@ private:
   /// \arg Value result is fixed, otherwise the value may change due to
   /// relocation.
   bool EvaluateFixup(const MCAsmLayout &Layout,
-                     const MCAsmFixup &Fixup, const MCFragment *DF,
+                     const MCFixup &Fixup, const MCFragment *DF,
                      MCValue &Target, uint64_t &Value) const;
 
   /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
   /// (increased in size, in order to hold its value correctly).
-  bool FixupNeedsRelaxation(const MCAsmFixup &Fixup, const MCFragment *DF,
+  bool FixupNeedsRelaxation(const MCFixup &Fixup, const MCFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   /// Check whether the given fragment needs relaxation.
   bool FragmentNeedsRelaxation(const MCInstFragment *IF,
                                const MCAsmLayout &Layout) const;
 
-  /// LayoutSection - Assign the section the given \arg StartAddress, and then
-  /// assign offsets and sizes to the fragments in the section \arg SD, and
-  /// update the section size.
-  ///
-  /// \return The address at the end of the section, for use in laying out the
-  /// succeeding section.
-  uint64_t LayoutSection(MCSectionData &SD, MCAsmLayout &Layout,
-                         uint64_t StartAddress);
+  /// Compute the effective fragment size assuming it is layed out at the given
+  /// \arg SectionAddress and \arg FragmentOffset.
+  uint64_t ComputeFragmentSize(MCAsmLayout &Layout, const MCFragment &F,
+                               uint64_t SectionAddress,
+                               uint64_t FragmentOffset) const;
 
   /// LayoutOnce - Perform one layout iteration and return true if any offsets
   /// were adjusted.
@@ -671,18 +632,8 @@ private:
   void FinishLayout(MCAsmLayout &Layout);
 
 public:
-  /// Find the symbol which defines the atom containing given address, inside
-  /// the given section, or null if there is no such symbol.
-  //
-  // FIXME-PERF: Eliminate this, it is very slow.
-  const MCSymbolData *getAtomForAddress(const MCAsmLayout &Layout,
-                                        const MCSectionData *Section,
-                                        uint64_t Address) const;
-
   /// Find the symbol which defines the atom containing the given symbol, or
   /// null if there is no such symbol.
-  //
-  // FIXME-PERF: Eliminate this, it is very slow.
   const MCSymbolData *getAtom(const MCAsmLayout &Layout,
                               const MCSymbolData *Symbol) const;
 
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 4434341..03b5fb0 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -20,6 +20,7 @@ namespace llvm {
   class MCExpr;
   class MCSection;
   class MCSymbol;
+  class MCLabel;
   class StringRef;
   class Twine;
   class MCSectionMachO;
@@ -43,6 +44,15 @@ namespace llvm {
     /// NextUniqueID - The next ID to dole out to an unnamed assembler temporary
     /// symbol.
     unsigned NextUniqueID;
+
+    /// Instances of directional local labels.
+    DenseMap<unsigned, MCLabel *> Instances;
+    /// NextInstance() creates the next instance of the directional local label
+    /// for the LocalLabelVal and adds it to the map if needed.
+    unsigned NextInstance(int64_t LocalLabelVal);
+    /// GetInstance() gets the current instance of the directional local label
+    /// for the LocalLabelVal and adds it to the map if needed.
+    unsigned GetInstance(int64_t LocalLabelVal);
     
     /// Allocator - Allocator object used for creating machine code objects.
     ///
@@ -50,7 +60,7 @@ namespace llvm {
     /// objects.
     BumpPtrAllocator Allocator;
     
-    void *MachOUniquingMap, *ELFUniquingMap;
+    void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap;
   public:
     explicit MCContext(const MCAsmInfo &MAI);
     ~MCContext();
@@ -64,6 +74,14 @@ namespace llvm {
     /// with a unique but unspecified name.
     MCSymbol *CreateTempSymbol();
 
+    /// CreateDirectionalLocalSymbol - Create the defintion of a directional
+    /// local symbol for numbered label (used for "1:" defintions).
+    MCSymbol *CreateDirectionalLocalSymbol(int64_t LocalLabelVal);
+
+    /// GetDirectionalLocalSymbol - Create and return a directional local
+    /// symbol for numbered label (used for "1b" or 1f" references).
+    MCSymbol *GetDirectionalLocalSymbol(int64_t LocalLabelVal, int bORf);
+
     /// GetOrCreateSymbol - Lookup the symbol inside with the specified
     /// @p Name.  If it exists, return it.  If not, create a forward
     /// reference and return it.
@@ -97,6 +115,15 @@ namespace llvm {
     const MCSection *getELFSection(StringRef Section, unsigned Type,
                                    unsigned Flags, SectionKind Kind,
                                    bool IsExplicit = false);
+
+    const MCSection *getCOFFSection(StringRef Section, unsigned Characteristics,
+                                    int Selection, SectionKind Kind);
+
+    const MCSection *getCOFFSection(StringRef Section, unsigned Characteristics,
+                                    SectionKind Kind) {
+      return getCOFFSection (Section, Characteristics, 0, Kind);
+    }
+
     
     /// @}
 
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index bd0684d..1f9b8f2 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -134,7 +134,10 @@ public:
     VK_NTPOFF,
     VK_PLT,
     VK_TLSGD,
-    VK_TPOFF
+    VK_TPOFF,
+    VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the asm file)
+    VK_ARM_LO16, // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the asm file)
+    VK_TLVP // Mach-O thread local variable relocation
   };
 
 private:
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index cd0dd19..eed4c34 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -10,26 +10,12 @@
 #ifndef LLVM_MC_MCFIXUP_H
 #define LLVM_MC_MCFIXUP_H
 
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
 class MCExpr;
 
-// Private constants, do not use.
-//
-// This is currently laid out so that the MCFixup fields can be efficiently
-// accessed, while keeping the offset field large enough that the assembler
-// backend can reasonably use the MCFixup representation for an entire fragment
-// (splitting any overly large fragments).
-//
-// The division of bits between the kind and the opindex can be tweaked if we
-// end up needing more bits for target dependent kinds.
-enum {
-  MCFIXUP_NUM_GENERIC_KINDS = 128,
-  MCFIXUP_NUM_KIND_BITS = 16,
-  MCFIXUP_NUM_OFFSET_BITS = (32 - MCFIXUP_NUM_KIND_BITS)
-};
-
 /// MCFixupKind - Extensible enumeration to represent the type of a fixup.
 enum MCFixupKind {
   FK_Data_1 = 0, ///< A one-byte fixup.
@@ -37,12 +23,14 @@ enum MCFixupKind {
   FK_Data_4,     ///< A four-byte fixup.
   FK_Data_8,     ///< A eight-byte fixup.
 
-  FirstTargetFixupKind = MCFIXUP_NUM_GENERIC_KINDS,
+  FirstTargetFixupKind = 128,
 
-  MaxTargetFixupKind = (1 << MCFIXUP_NUM_KIND_BITS)
+  // Limit range of target fixups, in case we want to pack more efficiently
+  // later.
+  MaxTargetFixupKind = (1 << 8)
 };
 
-/// MCFixup - Encode information on a single operation to perform on an byte
+/// MCFixup - Encode information on a single operation to perform on a byte
 /// sequence (e.g., an encoded instruction) which requires assemble- or run-
 /// time patching.
 ///
@@ -57,36 +45,33 @@ enum MCFixupKind {
 /// fixups become relocations in the object file (or errors, if the fixup cannot
 /// be encoded on the target).
 class MCFixup {
-  static const unsigned MaxOffset = 1 << MCFIXUP_NUM_KIND_BITS;
-
   /// The value to put into the fixup location. The exact interpretation of the
-  /// expression is target dependent, usually it will one of the operands to an
-  /// instruction or an assembler directive.
+  /// expression is target dependent, usually it will be one of the operands to
+  /// an instruction or an assembler directive.
   const MCExpr *Value;
 
   /// The byte index of start of the relocation inside the encoded instruction.
-  unsigned Offset : MCFIXUP_NUM_OFFSET_BITS;
+  uint32_t Offset;
 
   /// The target dependent kind of fixup item this is. The kind is used to
   /// determine how the operand value should be encoded into the instruction.
-  unsigned Kind : MCFIXUP_NUM_KIND_BITS;
+  unsigned Kind;
 
 public:
-  static MCFixup Create(unsigned Offset, const MCExpr *Value,
+  static MCFixup Create(uint32_t Offset, const MCExpr *Value,
                         MCFixupKind Kind) {
+    assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!");
     MCFixup FI;
     FI.Value = Value;
     FI.Offset = Offset;
     FI.Kind = unsigned(Kind);
-
-    assert(Offset == FI.getOffset() && "Offset out of range!");
-    assert(Kind == FI.getKind() && "Kind out of range!");
     return FI;
   }
 
   MCFixupKind getKind() const { return MCFixupKind(Kind); }
 
-  unsigned getOffset() const { return Offset; }
+  uint32_t getOffset() const { return Offset; }
+  void setOffset(uint32_t Value) { Offset = Value; }
 
   const MCExpr *getValue() const { return Value; }
 
diff --git a/include/llvm/MC/MCLabel.h b/include/llvm/MC/MCLabel.h
new file mode 100644
index 0000000..727520d
--- /dev/null
+++ b/include/llvm/MC/MCLabel.h
@@ -0,0 +1,56 @@
+//===- MCLabel.h - Machine Code Directional Local Labels --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCLabel class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCLABEL_H
+#define LLVM_MC_MCLABEL_H
+
+namespace llvm {
+  class MCContext;
+  class raw_ostream;
+
+  /// MCLabel - Instances of this class represent a label name in the MC file,
+  /// and MCLabel are created and unique'd by the MCContext class.  MCLabel
+  /// should only be constructed for valid instances in the object file.
+  class MCLabel {
+    // Instance - the instance number of this Directional Local Label
+    unsigned Instance;
+
+  private:  // MCContext creates and uniques these.
+    friend class MCContext;
+    MCLabel(unsigned instance)
+      : Instance(instance) {}
+
+    MCLabel(const MCLabel&);       // DO NOT IMPLEMENT
+    void operator=(const MCLabel&); // DO NOT IMPLEMENT
+  public:
+    /// getInstance - Get the current instance of this Directional Local Label.
+    unsigned getInstance() const { return Instance; }
+
+    /// incInstance - Increment the current instance of this Directional Local
+    /// Label.
+    unsigned incInstance() { return ++Instance; }
+
+    /// print - Print the value to the stream \arg OS.
+    void print(raw_ostream &OS) const;
+
+    /// dump - Print the value to stderr.
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const MCLabel &Label) {
+    Label.print(OS);
+    return OS;
+  }
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCMachOSymbolFlags.h b/include/llvm/MC/MCMachOSymbolFlags.h
new file mode 100644
index 0000000..c938c81
--- /dev/null
+++ b/include/llvm/MC/MCMachOSymbolFlags.h
@@ -0,0 +1,44 @@
+//===- MCMachOSymbolFlags.h - MachO Symbol Flags ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolFlags used for the MachO target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCMACHOSYMBOLFLAGS_H
+#define LLVM_MC_MCMACHOSYMBOLFLAGS_H
+
+// These flags are mostly used in MCMachOStreamer.cpp but also needed in
+// MachObjectWriter.cpp to test for Weak Definitions of symbols to emit
+// the correct relocation information.
+
+namespace llvm {
+  /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest
+  /// 16 bits of the implementation defined flags.
+  enum SymbolFlags { // See <mach-o/nlist.h>.
+    SF_DescFlagsMask                        = 0xFFFF,
+
+    // Reference type flags.
+    SF_ReferenceTypeMask                    = 0x0007,
+    SF_ReferenceTypeUndefinedNonLazy        = 0x0000,
+    SF_ReferenceTypeUndefinedLazy           = 0x0001,
+    SF_ReferenceTypeDefined                 = 0x0002,
+    SF_ReferenceTypePrivateDefined          = 0x0003,
+    SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
+    SF_ReferenceTypePrivateUndefinedLazy    = 0x0005,
+
+    // Other 'desc' flags.
+    SF_NoDeadStrip                          = 0x0020,
+    SF_WeakReference                        = 0x0040,
+    SF_WeakDefinition                       = 0x0080
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 522ee77..e900584 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -15,9 +15,9 @@
 #include <cassert>
 
 namespace llvm {
-class MCAsmFixup;
 class MCAsmLayout;
 class MCAssembler;
+class MCFixup;
 class MCFragment;
 class MCValue;
 class raw_ostream;
@@ -72,13 +72,13 @@ public:
   virtual void RecordRelocation(const MCAssembler &Asm,
                                 const MCAsmLayout &Layout,
                                 const MCFragment *Fragment,
-                                const MCAsmFixup &Fixup, MCValue Target,
+                                const MCFixup &Fixup, MCValue Target,
                                 uint64_t &FixedValue) = 0;
 
   /// Write the object file.
   ///
   /// This routine is called by the assembler after layout and relaxation is
-  /// complete, fixups have been evaluate and applied, and relocations
+  /// complete, fixups have been evaluated and applied, and relocations
   /// generated.
   virtual void WriteObject(const MCAssembler &Asm,
                            const MCAsmLayout &Layout) = 0;
@@ -152,6 +152,8 @@ public:
   }
 
   void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    assert((ZeroFillSize == 0 || Str.size () <= ZeroFillSize) &&
+      "data size greater than fill size, unexpected large write will occur");
     OS << Str;
     if (ZeroFillSize)
       WriteZeros(ZeroFillSize - Str.size());
diff --git a/include/llvm/MC/MCParser/AsmParser.h b/include/llvm/MC/MCParser/AsmParser.h
index 7a78906..e929fd1 100644
--- a/include/llvm/MC/MCParser/AsmParser.h
+++ b/include/llvm/MC/MCParser/AsmParser.h
@@ -131,11 +131,13 @@ private:
   /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
   /// accepts a single symbol (which should be a label or an external).
   bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+  bool ParseDirectiveELFType(); // ELF specific ".type"
   bool ParseDirectiveDarwinSymbolDesc(); // Darwin specific ".desc"
   bool ParseDirectiveDarwinLsym(); // Darwin specific ".lsym"
 
   bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
   bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill"
+  bool ParseDirectiveDarwinTBSS(); // Darwin specific ".tbss"
 
   // Darwin specific ".subsections_via_symbols"
   bool ParseDirectiveDarwinSubsectionsViaSymbols();
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 075b69b..bd1496f 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -47,7 +47,7 @@ public:
     Pipe, PipePipe, Caret, 
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     Less, LessEqual, LessLess, LessGreater,
-    Greater, GreaterEqual, GreaterGreater
+    Greater, GreaterEqual, GreaterGreater, At
   };
 
   TokenKind Kind;
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index e9c19fc..808767c 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -17,6 +17,7 @@
 #include <string>
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
 
 namespace llvm {
   class MCContext;
@@ -27,15 +28,27 @@ namespace llvm {
   /// section in the current translation unit.  The MCContext class uniques and
   /// creates these.
   class MCSection {
+  public:
+    enum SectionVariant {
+      SV_COFF = 0,
+      SV_ELF,
+      SV_MachO,
+      SV_PIC16
+    };
+
+  private:
     MCSection(const MCSection&);      // DO NOT IMPLEMENT
     void operator=(const MCSection&); // DO NOT IMPLEMENT
   protected:
-    MCSection(SectionKind K) : Kind(K) {}
+    MCSection(SectionVariant V, SectionKind K) : Variant(V), Kind(K) {}
+    SectionVariant Variant;
     SectionKind Kind;
   public:
     virtual ~MCSection();
 
     SectionKind getKind() const { return Kind; }
+
+    SectionVariant getVariant() const { return Variant; }
     
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
                                       raw_ostream &OS) const = 0;
@@ -47,32 +60,8 @@ namespace llvm {
     virtual bool isBaseAddressKnownZero() const {
       return false;
     }
-  };
-
-  class MCSectionCOFF : public MCSection {
-    // The memory for this string is stored in the same MCContext as *this.
-    StringRef Name;
-    
-    /// IsDirective - This is true if the section name is a directive, not
-    /// something that should be printed with ".section".
-    ///
-    /// FIXME: This is a hack.  Switch to a semantic view of the section instead
-    /// of a syntactic one.
-    bool IsDirective;
-    
-    MCSectionCOFF(StringRef name, bool isDirective, SectionKind K)
-      : MCSection(K), Name(name), IsDirective(isDirective) {
-    }
-  public:
-    
-    static MCSectionCOFF *Create(StringRef Name, bool IsDirective, 
-                                 SectionKind K, MCContext &Ctx);
 
-    StringRef getName() const { return Name; }
-    bool isDirective() const { return IsDirective; }
-    
-    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const;
+    static bool classof(const MCSection *) { return true; }
   };
   
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
new file mode 100644
index 0000000..938a388
--- /dev/null
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -0,0 +1,115 @@
+//===- MCSectionCOFF.h - COFF Machine Code Sections -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionCOFF class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONCOFF_H
+#define LLVM_MC_MCSECTIONCOFF_H
+
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+  
+/// MCSectionCOFF - This represents a section on Windows
+  class MCSectionCOFF : public MCSection {
+    // The memory for this string is stored in the same MCContext as *this.
+    StringRef SectionName;
+    
+    /// Characteristics - This is the Characteristics field of a section,
+    //  drawn from the enums below.
+    unsigned Characteristics;
+
+    /// Selection - This is the Selection field for the section symbol, if
+    /// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
+    int Selection;
+
+  private:
+    friend class MCContext;
+    MCSectionCOFF(StringRef Section, unsigned Characteristics,
+                  int Selection, SectionKind K)
+      : MCSection(SV_COFF, K), SectionName(Section),
+        Characteristics(Characteristics), Selection (Selection) {
+      assert ((Characteristics & 0x00F00000) == 0 &&
+        "alignment must not be set upon section creation");
+    }
+    ~MCSectionCOFF();
+
+  public:
+    /// ShouldOmitSectionDirective - Decides whether a '.section' directive
+    /// should be printed before the section name
+    bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+
+    //FIXME: all COFF enumerations/flags should be standardized into one place...
+    // Target/X86COFF.h doesn't seem right as COFF can be used for other targets,
+    // MC/WinCOFF.h maybe right as it isn't target or entity specific, and it is
+    //   pretty low on the dependancy graph (is there any need to support non
+    //   windows COFF?)
+    // here is good for section stuff, but others should go elsewhere
+
+    /// Valid section flags.
+    enum {
+      IMAGE_SCN_TYPE_NO_PAD                     = 0x00000008,
+      IMAGE_SCN_CNT_CODE                        = 0x00000020,
+      IMAGE_SCN_CNT_INITIALIZED_DATA            = 0x00000040,
+      IMAGE_SCN_CNT_UNINITIALIZED_DATA          = 0x00000080,
+      IMAGE_SCN_LNK_OTHER                       = 0x00000100,
+      IMAGE_SCN_LNK_INFO                        = 0x00000200,
+      IMAGE_SCN_LNK_REMOVE                      = 0x00000800,
+      IMAGE_SCN_LNK_COMDAT                      = 0x00001000,
+      IMAGE_SCN_MEM_FARDATA                     = 0x00008000,
+      IMAGE_SCN_MEM_PURGEABLE                   = 0x00020000,
+      IMAGE_SCN_MEM_16BIT                       = 0x00020000,
+      IMAGE_SCN_MEM_LOCKED                      = 0x00040000,
+      IMAGE_SCN_MEM_PRELOAD                     = 0x00080000,
+      /* these are handled elsewhere
+      IMAGE_SCN_ALIGN_1BYTES                    = 0x00100000,
+      IMAGE_SCN_ALIGN_2BYTES                    = 0x00200000,
+      IMAGE_SCN_ALIGN_4BYTES                    = 0x00300000,
+      IMAGE_SCN_ALIGN_8BYTES                    = 0x00400000,
+      IMAGE_SCN_ALIGN_16BYTES                   = 0x00500000,
+      IMAGE_SCN_ALIGN_32BYTES                   = 0x00600000,
+      IMAGE_SCN_ALIGN_64BYTES                   = 0x00700000,
+      */
+      IMAGE_SCN_LNK_NRELOC_OVFL                 = 0x01000000,
+      IMAGE_SCN_MEM_DISCARDABLE                 = 0x02000000,
+      IMAGE_SCN_MEM_NOT_CACHED                  = 0x04000000,
+      IMAGE_SCN_MEM_NOT_PAGED                   = 0x08000000,
+      IMAGE_SCN_MEM_SHARED                      = 0x10000000,
+      IMAGE_SCN_MEM_EXECUTE                     = 0x20000000,
+      IMAGE_SCN_MEM_READ                        = 0x40000000,
+      IMAGE_SCN_MEM_WRITE                       = 0x80000000
+    };
+
+    enum {
+      IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
+      IMAGE_COMDAT_SELECT_ANY,
+      IMAGE_COMDAT_SELECT_SAME_SIZE,
+      IMAGE_COMDAT_SELECT_EXACT_MATCH,
+      IMAGE_COMDAT_SELECT_ASSOCIATIVE,
+      IMAGE_COMDAT_SELECT_LARGEST
+    };
+
+    StringRef getSectionName() const { return SectionName; }
+    unsigned getCharacteristics() const { return Characteristics; }
+    int getSelection () const { return Selection; }
+    
+    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                      raw_ostream &OS) const;
+
+    static bool classof(const MCSection *S) {
+      return S->getVariant() == SV_COFF;
+    }
+    static bool classof(const MCSectionCOFF *) { return true; }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 7054668..5fe8171 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -40,7 +40,7 @@ private:
   friend class MCContext;
   MCSectionELF(StringRef Section, unsigned type, unsigned flags,
                SectionKind K, bool isExplicit)
-    : MCSection(K), SectionName(Section), Type(type), Flags(flags), 
+    : MCSection(SV_ELF, K), SectionName(Section), Type(type), Flags(flags),
       IsExplicit(isExplicit) {}
   ~MCSectionELF();
 public:
@@ -178,6 +178,11 @@ public:
   virtual bool isBaseAddressKnownZero() const {
     return (getFlags() & SHF_ALLOC) == 0;
   }
+
+  static bool classof(const MCSection *S) {
+    return S->getVariant() == SV_ELF;
+  }
+  static bool classof(const MCSectionELF *) { return true; }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index f3bc8ed..2d9d133 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -87,8 +87,20 @@ public:
     /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to
     /// lazy loaded dylibs.
     S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10U,
+    /// S_THREAD_LOCAL_REGULAR - Section with ....
+    S_THREAD_LOCAL_REGULAR = 0x11U,
+    /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section.
+    S_THREAD_LOCAL_ZEROFILL = 0x12U,
+    /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable structure
+    /// data.
+    S_THREAD_LOCAL_VARIABLES = 0x13U,
+    /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with ....
+    S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14U,
+    /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
+    /// variable initialization pointers to functions.
+    S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15U,
 
-    LAST_KNOWN_SECTION_TYPE = S_LAZY_DYLIB_SYMBOL_POINTERS,
+    LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
     
 
     // Valid section attributes.
@@ -136,6 +148,9 @@ public:
   unsigned getStubSize() const { return Reserved2; }
 
   unsigned getType() const { return TypeAndAttributes & SECTION_TYPE; }
+  bool hasAttribute(unsigned Value) const {
+    return (TypeAndAttributes & Value) != 0;
+  }
 
   /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
   /// This is a string that can appear after a .section directive in a mach-o
@@ -150,6 +165,11 @@ public:
 
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
                                     raw_ostream &OS) const;
+
+  static bool classof(const MCSection *S) {
+    return S->getVariant() == SV_MachO;
+  }
+  static bool classof(const MCSectionMachO *) { return true; }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 4667c41..0783159 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -27,7 +27,7 @@ namespace llvm {
   class MCSection;
   class MCSymbol;
   class StringRef;
-class TargetAsmBackend;
+  class TargetAsmBackend;
   class Twine;
   class raw_ostream;
   class formatted_raw_ostream;
@@ -138,7 +138,24 @@ class TargetAsmBackend;
     /// @param DescValue - The value to set into the n_desc field.
     virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) = 0;
 
-    
+    /// BeginCOFFSymbolDef - Start emitting COFF symbol definition
+    ///
+    /// @param Symbol - The symbol to have its External & Type fields set.
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) = 0;
+
+    /// EmitCOFFSymbolStorageClass - Emit the storage class of the symbol.
+    ///
+    /// @param StorageClass - The storage class the symbol should have.
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) = 0;
+
+    /// EmitCOFFSymbolType - Emit the type of the symbol.
+    ///
+    /// @param Type - A COFF type identifier (see COFF::SymbolType in X86COFF.h)
+    virtual void EmitCOFFSymbolType(int Type) = 0;
+
+    /// EndCOFFSymbolDef - Marks the end of the symbol definition.
+    virtual void EndCOFFSymbolDef() = 0;
+
     /// EmitELFSize - Emit an ELF .size directive.
     ///
     /// This corresponds to an assembler statement such as:
@@ -161,7 +178,7 @@ class TargetAsmBackend;
     /// @param Size - The size of the common symbol.
     virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) = 0;
     
-    /// EmitZerofill - Emit a the zerofill section and an option symbol.
+    /// EmitZerofill - Emit the zerofill section and an optional symbol.
     ///
     /// @param Section - The zerofill section to create and or to put the symbol
     /// @param Symbol - The zerofill symbol to emit, if non-NULL.
@@ -171,6 +188,15 @@ class TargetAsmBackend;
     virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                               unsigned Size = 0,unsigned ByteAlignment = 0) = 0;
 
+    /// EmitTBSSSymbol - Emit a thread local bss (.tbss) symbol.
+    ///
+    /// @param Section - The thread local common section.
+    /// @param Symbol - The thread local common symbol to emit.
+    /// @param Size - The size of the symbol.
+    /// @param ByteAlignment - The alignment of the thread local common symbol
+    /// if non-zero.  This must be a power of 2 on some targets.
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment = 0) = 0;                                
     /// @}
     /// @name Generating Data
     /// @{
@@ -315,12 +341,18 @@ class TargetAsmBackend;
                                 MCCodeEmitter *CE = 0,
                                 bool ShowInst = false);
 
-  /// createMachOStream - Create a machine code streamer which will generative
+  /// createMachOStreamer - Create a machine code streamer which will generative
   /// Mach-O format object files.
   MCStreamer *createMachOStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
                                   raw_ostream &OS, MCCodeEmitter *CE,
                                   bool RelaxAll = false);
 
+  /// createLoggingStreamer - Create a machine code streamer which just logs the
+  /// API calls and then dispatches to another streamer.
+  ///
+  /// The new streamer takes ownership of the \arg Child.
+  MCStreamer *createLoggingStreamer(MCStreamer *Child, raw_ostream &OS);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index fb96506..1b432c2 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -52,10 +52,15 @@ namespace llvm {
     /// "Lfoo" or ".foo".
     unsigned IsTemporary : 1;
 
+    /// IsUsedInExpr - True if this symbol has been used in an expression and
+    /// cannot be redefined.
+    unsigned IsUsedInExpr : 1;
+
   private:  // MCContext creates and uniques these.
     friend class MCContext;
     MCSymbol(StringRef name, bool isTemporary)
-      : Name(name), Section(0), Value(0), IsTemporary(isTemporary) {}
+      : Name(name), Section(0), Value(0),
+        IsTemporary(isTemporary), IsUsedInExpr(false) {}
 
     MCSymbol(const MCSymbol&);       // DO NOT IMPLEMENT
     void operator=(const MCSymbol&); // DO NOT IMPLEMENT
@@ -63,13 +68,15 @@ namespace llvm {
     /// getName - Get the symbol name.
     StringRef getName() const { return Name; }
 
-    /// @name Symbol Type
+    /// @name Accessors
     /// @{
 
     /// isTemporary - Check if this is an assembler temporary symbol.
-    bool isTemporary() const {
-      return IsTemporary;
-    }
+    bool isTemporary() const { return IsTemporary; }
+
+    /// isUsedInExpr - Check if this is an assembler temporary symbol.
+    bool isUsedInExpr() const { return IsUsedInExpr; }
+    void setUsedInExpr(bool Value) { IsUsedInExpr = Value; }
 
     /// @}
     /// @name Associated Sections
@@ -125,14 +132,14 @@ namespace llvm {
       return Value != 0;
     }
 
-    /// getValue() - Get the value for variable symbols, or null if the symbol
-    /// is not a variable.
-    const MCExpr *getValue() const { return Value; }
-
-    void setValue(const MCExpr *Value) {
-      this->Value = Value;
+    /// getValue() - Get the value for variable symbols.
+    const MCExpr *getVariableValue() const {
+      assert(isVariable() && "Invalid accessor!");
+      return Value;
     }
 
+    void setVariableValue(const MCExpr *Value);
+
     /// @}
 
     /// print - Print the value to the stream \arg OS.
diff --git a/include/llvm/MC/MachObjectWriter.h b/include/llvm/MC/MachObjectWriter.h
index 844025d..9b1ff1d 100644
--- a/include/llvm/MC/MachObjectWriter.h
+++ b/include/llvm/MC/MachObjectWriter.h
@@ -15,9 +15,9 @@
 #include <cassert>
 
 namespace llvm {
-class MCAsmFixup;
 class MCAssembler;
 class MCFragment;
+class MCFixup;
 class MCValue;
 class raw_ostream;
 
@@ -33,7 +33,7 @@ public:
   virtual void RecordRelocation(const MCAssembler &Asm,
                                 const MCAsmLayout &Layout,
                                 const MCFragment *Fragment,
-                                const MCAsmFixup &Fixup, MCValue Target,
+                                const MCFixup &Fixup, MCValue Target,
                                 uint64_t &FixedValue);
 
   virtual void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
diff --git a/include/llvm/PassManager.h b/include/llvm/PassManager.h
index 4d91163..8de0f83 100644
--- a/include/llvm/PassManager.h
+++ b/include/llvm/PassManager.h
@@ -60,6 +60,9 @@ public:
   bool run(Module &M);
 
 private:
+  /// addImpl - Add a pass to the queue of passes to run, without
+  /// checking whether to add a printer pass.
+  void addImpl(Pass *P);
 
   /// PassManagerImpl_New is the actual class. PassManager is just the 
   /// wraper to publish simple pass manager interface
@@ -96,6 +99,10 @@ public:
   bool doFinalization();
   
 private:
+  /// addImpl - Add a pass to the queue of passes to run, without
+  /// checking whether to add a printer pass.
+  void addImpl(Pass *P);
+
   FunctionPassManagerImpl *FPM;
   Module *M;
 };
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 881a0fe..b2ce76d 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -15,12 +15,21 @@
 #ifndef LLVM_SUPPORT_COMPILER_H
 #define LLVM_SUPPORT_COMPILER_H
 
-// The VISIBILITY_HIDDEN macro, used for marking classes with the GCC-specific
-// visibility("hidden") attribute.
+/// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
+/// into a shared library, then the class should be private to the library and
+/// not accessible from outside it.  Can also be used to mark variables and
+/// functions, making them private to any shared library they are linked into.
+
+/// LLVM_GLOBAL_VISIBILITY - If a class marked with this attribute is linked
+/// into a shared library, then the class will be accessible from outside the
+/// the library.  Can also be used to mark variables and functions, making them
+/// accessible from outside any shared library they are linked into.
 #if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
-#define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden")))
+#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
+#define LLVM_GLOBAL_VISIBILITY __attribute__ ((visibility("default")))
 #else
-#define VISIBILITY_HIDDEN
+#define LLVM_LIBRARY_VISIBILITY
+#define LLVM_GLOBAL_VISIBILITY
 #endif
 
 #if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 54ced15..796c74a 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -36,7 +36,7 @@ protected:
   }
 
 public:
-  DefaultDOTGraphTraits (bool simple=false) : IsSimple (simple) {}
+  explicit DefaultDOTGraphTraits(bool simple=false) : IsSimple (simple) {}
 
   /// getGraphName - Return the label for the graph as a whole.  Printed at the
   /// top of the graph.
@@ -59,6 +59,12 @@ public:
     return false;
   }
 
+  /// isNodeHidden - If the function returns true, the given node is not
+  /// displayed in the graph.
+  static bool isNodeHidden(const void *Node) {
+    return false;
+  }
+
   /// getNodeLabel - Given a node and a pointer to the top level graph, return
   /// the label to print in the node.
   template<typename GraphType>
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 13e6682..559f004 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -122,7 +122,20 @@ public:
     // Loop over the graph, printing it out...
     for (node_iterator I = GTraits::nodes_begin(G), E = GTraits::nodes_end(G);
          I != E; ++I)
-      writeNode(*I);
+      if (!isNodeHidden(*I))
+        writeNode(*I);
+  }
+
+  bool isNodeHidden(NodeType &Node) {
+    return isNodeHidden(&Node);
+  }
+
+  bool isNodeHidden(NodeType *const *Node) {
+    return isNodeHidden(*Node);
+  }
+
+  bool isNodeHidden(NodeType *Node) {
+    return DTraits.isNodeHidden(Node);
   }
 
   void writeNode(NodeType& Node) {
@@ -189,9 +202,11 @@ public:
     child_iterator EI = GTraits::child_begin(Node);
     child_iterator EE = GTraits::child_end(Node);
     for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i)
-      writeEdge(Node, i, EI);
+      if (!DTraits.isNodeHidden(*EI))
+        writeEdge(Node, i, EI);
     for (; EI != EE; ++EI)
-      writeEdge(Node, 64, EI);
+      if (!DTraits.isNodeHidden(*EI))
+        writeEdge(Node, 64, EI);
   }
 
   void writeEdge(NodeType *Node, unsigned edgeidx, child_iterator EI) {
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index f233c18..b97ad29 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -31,7 +31,7 @@ namespace llvm {
   ///
   /// \arg OptimizationLevel - The optimization level, corresponding to -O0,
   /// -O1, etc.
-  static inline void createStandardFunctionPasses(FunctionPassManager *PM,
+  static inline void createStandardFunctionPasses(PassManagerBase *PM,
                                                   unsigned OptimizationLevel);
 
   /// createStandardModulePasses - Add the standard list of module passes to the
@@ -46,7 +46,7 @@ namespace llvm {
   /// \arg HaveExceptions - Whether the module may have code using exceptions.
   /// \arg InliningPass - The inlining pass to use, if any, or null. This will
   /// always be added, even at -O0.a
-  static inline void createStandardModulePasses(PassManager *PM,
+  static inline void createStandardModulePasses(PassManagerBase *PM,
                                                 unsigned OptimizationLevel,
                                                 bool OptimizeSize,
                                                 bool UnitAtATime,
@@ -61,14 +61,14 @@ namespace llvm {
   /// Internalize - Run the internalize pass.
   /// RunInliner - Use a function inlining pass.
   /// VerifyEach - Run the verifier after each pass.
-  static inline void createStandardLTOPasses(PassManager *PM,
+  static inline void createStandardLTOPasses(PassManagerBase *PM,
                                              bool Internalize,
                                              bool RunInliner,
                                              bool VerifyEach);
 
   // Implementations
 
-  static inline void createStandardFunctionPasses(FunctionPassManager *PM,
+  static inline void createStandardFunctionPasses(PassManagerBase *PM,
                                                   unsigned OptimizationLevel) {
     if (OptimizationLevel > 0) {
       PM->add(createCFGSimplificationPass());
@@ -82,7 +82,7 @@ namespace llvm {
 
   /// createStandardModulePasses - Add the standard module passes.  This is
   /// expected to be run after the standard function passes.
-  static inline void createStandardModulePasses(PassManager *PM,
+  static inline void createStandardModulePasses(PassManagerBase *PM,
                                                 unsigned OptimizationLevel,
                                                 bool OptimizeSize,
                                                 bool UnitAtATime,
@@ -164,14 +164,14 @@ namespace llvm {
     }
   }
 
-  static inline void addOnePass(PassManager *PM, Pass *P, bool AndVerify) {
+  static inline void addOnePass(PassManagerBase *PM, Pass *P, bool AndVerify) {
     PM->add(P);
 
     if (AndVerify)
       PM->add(createVerifierPass());
   }
 
-  static inline void createStandardLTOPasses(PassManager *PM,
+  static inline void createStandardLTOPasses(PassManagerBase *PM,
                                              bool Internalize,
                                              bool RunInliner,
                                              bool VerifyEach) {
diff --git a/include/llvm/System/Signals.h b/include/llvm/System/Signals.h
index 2b9d8ca..504420c 100644
--- a/include/llvm/System/Signals.h
+++ b/include/llvm/System/Signals.h
@@ -20,6 +20,10 @@
 namespace llvm {
 namespace sys {
 
+  /// This function runs all the registered interrupt handlers, including the
+  /// removal of files registered by RemoveFileOnSignal.
+  void RunInterruptHandlers();
+
   /// This function registers signal handlers to ensure that if a signal gets
   /// delivered that the named file is removed.
   /// @brief Remove a file if a fatal signal occurs.
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h
index 38a3cc2..4546871 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/Target/SubtargetFeature.h
@@ -108,9 +108,10 @@ public:
   // Dump feature info.
   void dump() const;
 
-  /// Retrieve a formatted string of the default features for
-  /// the specified target triple.
-  static std::string getDefaultSubtargetFeatures(const Triple &Triple);
+  /// Retrieve a formatted string of the default features for the specified
+  /// target triple.
+  void getDefaultSubtargetFeatures(const std::string &CPU,
+                                   const Triple& Triple);
 };
 
 } // End namespace llvm
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index cc19e0d..ca551e5 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -21,6 +21,11 @@ include "llvm/Intrinsics.td"
 
 class RegisterClass; // Forward def
 
+// SubRegIndex - Use instances of SubRegIndex to identify subregisters.
+class SubRegIndex {
+  string Namespace = "";
+}
+
 // Register - You should define one instance of this class for each register
 // in the target machine.  String n will become the "name" of the register.
 class Register<string n> {
@@ -49,6 +54,23 @@ class Register<string n> {
   // not [AX, AH, AL].
   list<Register> SubRegs = [];
 
+  // SubRegIndices - For each register in SubRegs, specify the SubRegIndex used
+  // to address it. Sub-sub-register indices are automatically inherited from
+  // SubRegs.
+  list<SubRegIndex> SubRegIndices = [];
+
+  // CompositeIndices - Specify subreg indices that don't correspond directly to
+  // a register in SubRegs and are not inherited. The following formats are
+  // supported:
+  //
+  // (a)     Identity  - Reg:a == Reg
+  // (a b)   Alias     - Reg:a == Reg:b
+  // (a b,c) Composite - Reg:a == (Reg:b):c
+  //
+  // This can be used to disambiguate a sub-sub-register that exists in more
+  // than one subregister and other weird stuff.
+  list<dag> CompositeIndices = [];
+
   // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register.
   // These values can be determined by locating the <target>.h file in the
   // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
@@ -68,17 +90,6 @@ class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
   let SubRegs = subregs;
 }
 
-// SubRegSet - This can be used to define a specific mapping of registers to
-// indices, for use as named subregs of a particular physical register.  Each
-// register in 'subregs' becomes an addressable subregister at index 'n' of the
-// corresponding register in 'regs'.
-class SubRegSet<int n, list<Register> regs, list<Register> subregs> {
-  int index = n;
-  
-  list<Register> From = regs;
-  list<Register> To = subregs;
-}
-
 // RegisterClass - Now that all of the registers are defined, and aliases
 // between registers are defined, specify which registers belong to which
 // register classes.  This also defines the default allocation order of
@@ -117,9 +128,9 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   //
   list<Register> MemberList = regList;
   
-  // SubClassList - Specify which register classes correspond to subregisters
-  // of this class. The order should be by subregister set index.
-  list<RegisterClass> SubRegClassList = [];
+  // SubRegClasses - Specify the register class of subregisters as a list of
+  // dags: (RegClass SubRegIndex, SubRegindex, ...)
+  list<dag> SubRegClasses = [];
 
   // MethodProtos/MethodBodies - These members can be used to insert arbitrary
   // code into a generated register class.   The normal usage of this is to 
@@ -221,6 +232,9 @@ class Instruction {
   // purposes.
   bit isCodeGenOnly = 0;
 
+  // Is this instruction a pseudo instruction for use by the assembler parser.
+  bit isAsmParserOnly = 0;
+
   InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling.
 
   string Constraints = "";  // OperandConstraint, e.g. $src = $dst.
@@ -296,8 +310,8 @@ class AsmOperandClass {
   /// The name to use for this class, which should be usable as an enum value.
   string Name = ?;
 
-  /// The super class of this operand.
-  AsmOperandClass SuperClass = ?;
+  /// The super classes of this operand.
+  list<AsmOperandClass> SuperClasses = [];
 
   /// The name of the method on the target specific operand to call to test
   /// whether the operand is an instance of this class. If not set, this will
@@ -331,10 +345,10 @@ class Operand<ValueType ty> {
   // in. Match classes are used to define the order in which instructions are
   // match, to ensure that which instructions gets matched is deterministic.
   //
-  // The target specific parser must be able to classify an parsed operand 
-  // into a unique class, which does not partially overlap with any other 
-  // classes. It can match a subset of some other class, in which case 
-  // ParserMatchSuperClass should be set to the name of that class.
+  // The target specific parser must be able to classify an parsed operand into
+  // a unique class, which does not partially overlap with any other classes. It
+  // can match a subset of some other class, in which case the AsmOperandClass
+  // should declare the other operand as one of its super classes.
   AsmOperandClass ParserMatchClass = ImmAsmOperand;
 }
 
diff --git a/include/llvm/Target/TargetAsmBackend.h b/include/llvm/Target/TargetAsmBackend.h
index f350ecc..979595a 100644
--- a/include/llvm/Target/TargetAsmBackend.h
+++ b/include/llvm/Target/TargetAsmBackend.h
@@ -13,10 +13,9 @@
 #include "llvm/System/DataTypes.h"
 
 namespace llvm {
-class MCAsmFixup;
 class MCDataFragment;
+class MCFixup;
 class MCInst;
-class MCInstFragment;
 class MCObjectWriter;
 class MCSection;
 template<typename T>
@@ -90,6 +89,14 @@ public:
     return false;
   }
 
+  /// isSectionAtomizable - Check whether the given section can be split into
+  /// atoms.
+  ///
+  /// \see MCAssembler::isSymbolLinkerVisible().
+  virtual bool isSectionAtomizable(const MCSection &Section) const {
+    return true;
+  }
+
   /// isVirtualSection - Check whether the given section is "virtual", that is
   /// has no actual object file contents.
   virtual bool isVirtualSection(const MCSection &Section) const = 0;
@@ -97,22 +104,22 @@ public:
   /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
   /// data fragment, at the offset specified by the fixup and following the
   /// fixup kind as appropriate.
-  virtual void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &Fragment,
+  virtual void ApplyFixup(const MCFixup &Fixup, MCDataFragment &Fragment,
                           uint64_t Value) const = 0;
 
   /// MayNeedRelaxation - Check whether the given instruction may need
   /// relaxation.
   ///
-  /// \arg Inst - The instruction to test.
-  /// \arg Fixups - The actual fixups this instruction encoded to, for potential
-  /// use by the target backend.
-  virtual bool MayNeedRelaxation(const MCInst &Inst,
-                           const SmallVectorImpl<MCAsmFixup> &Fixups) const = 0;
+  /// \param Inst - The instruction to test.
+  virtual bool MayNeedRelaxation(const MCInst &Inst) const = 0;
 
   /// RelaxInstruction - Relax the instruction in the given fragment to the next
   /// wider instruction.
-  virtual void RelaxInstruction(const MCInstFragment *IF,
-                                MCInst &Res) const = 0;
+  ///
+  /// \param Inst - The instruction to relax, which may be the same as the
+  /// output.
+  /// \parm Res [output] - On return, the relaxed instruction.
+  virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const = 0;
 
   /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
   /// output. If the target cannot generate such a sequence, it should return an
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 143dbcc..2e5697e 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -315,7 +315,8 @@ public:
                             MachineBasicBlock::iterator MI,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const {
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const {
     assert(0 && "Target didn't implement TargetInstrInfo::copyRegToReg!");
     return false;
   }
@@ -328,7 +329,8 @@ public:
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const {
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
     assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
   }
 
@@ -339,7 +341,8 @@ public:
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const {
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
     assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
   }
   
@@ -349,7 +352,8 @@ public:
   /// storeRegToStackSlot(). Returns false otherwise.
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                         const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const {
     return false;
   }
 
@@ -359,7 +363,8 @@ public:
   /// Returns false otherwise.
   virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
     return false;
   }
   
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 4ea6c94..5efebe6 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -55,7 +55,6 @@ namespace llvm {
   class TargetData;
   class TargetMachine;
   class TargetRegisterClass;
-  class TargetSubtarget;
   class TargetLoweringObjectFile;
   class Value;
 
@@ -98,11 +97,6 @@ public:
     ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
   };
 
-  enum SchedPreference {
-    SchedulingForLatency,          // Scheduling for shortest total latency.
-    SchedulingForRegPressure       // Scheduling for lowest register pressure.
-  };
-
   /// NOTE: The constructor takes ownership of TLOF.
   explicit TargetLowering(const TargetMachine &TM,
                           const TargetLoweringObjectFile *TLOF);
@@ -151,13 +145,20 @@ public:
   BooleanContent getBooleanContents() const { return BooleanContents;}
 
   /// getSchedulingPreference - Return target scheduling preference.
-  SchedPreference getSchedulingPreference() const {
+  Sched::Preference getSchedulingPreference() const {
     return SchedPreferenceInfo;
   }
 
+  /// getSchedulingPreference - Some scheduler, e.g. hybrid, can switch to
+  /// different scheduling heuristics for different nodes. This function returns
+  /// the preference (or none) for the given node.
+  virtual Sched::Preference getSchedulingPreference(SDNode *N) const {
+    return Sched::None;
+  }
+
   /// getRegClassFor - Return the register class that should be used for the
-  /// specified value type.  This may only be called on legal types.
-  TargetRegisterClass *getRegClassFor(EVT VT) const {
+  /// specified value type.
+  virtual TargetRegisterClass *getRegClassFor(EVT VT) const {
     assert(VT.isSimple() && "getRegClassFor called on illegal type!");
     TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
     assert(RC && "This value type is not natively supported!");
@@ -181,11 +182,9 @@ public:
   }
 
   class ValueTypeActionImpl {
-    /// ValueTypeActions - This is a bitvector that contains two bits for each
-    /// value type, where the two bits correspond to the LegalizeAction enum.
-    /// This can be queried with "getTypeAction(VT)".
-    /// dimension by (MVT::MAX_ALLOWED_VALUETYPE/32) * 2
-    uint32_t ValueTypeActions[(MVT::MAX_ALLOWED_VALUETYPE/32)*2];
+    /// ValueTypeActions - For each value type, keep a LegalizeAction enum
+    /// that indicates how instruction selection should deal with the type.
+    uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
   public:
     ValueTypeActionImpl() {
       std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
@@ -202,13 +201,11 @@ public:
         return Legal;
       }
       unsigned I = VT.getSimpleVT().SimpleTy;
-      assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0]));
-      return (LegalizeAction)((ValueTypeActions[I>>4] >> ((2*I) & 31)) & 3);
+      return (LegalizeAction)ValueTypeActions[I];
     }
     void setTypeAction(EVT VT, LegalizeAction Action) {
       unsigned I = VT.getSimpleVT().SimpleTy;
-      assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0]));
-      ValueTypeActions[I>>4] |= Action << ((I*2) & 31);
+      ValueTypeActions[I] = Action;
     }
   };
   
@@ -357,13 +354,9 @@ public:
   /// for it.
   LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
     if (VT.isExtended()) return Expand;
-    assert(Op < array_lengthof(OpActions[0]) &&
-           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(OpActions[0][0])*8 &&
-           "Table isn't big enough!");
+    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
     unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
-    unsigned J = I & 31;
-    I = I >> 5;
-    return (LegalizeAction)((OpActions[I][Op] >> (J*2) ) & 3);
+    return (LegalizeAction)OpActions[I][Op];
   }
 
   /// isOperationLegalOrCustom - Return true if the specified operation is
@@ -386,35 +379,31 @@ public:
   /// either it is legal, needs to be promoted to a larger size, needs to be
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
-  LegalizeAction getLoadExtAction(unsigned LType, EVT VT) const {
-    assert(LType < array_lengthof(LoadExtActions) &&
-           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(LoadExtActions[0])*4 &&
+  LegalizeAction getLoadExtAction(unsigned ExtType, EVT VT) const {
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE &&
+           (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((LoadExtActions[LType] >> 
-              (2*VT.getSimpleVT().SimpleTy)) & 3);
+    return (LegalizeAction)LoadExtActions[VT.getSimpleVT().SimpleTy][ExtType];
   }
 
   /// isLoadExtLegal - Return true if the specified load with extension is legal
   /// on this target.
-  bool isLoadExtLegal(unsigned LType, EVT VT) const {
+  bool isLoadExtLegal(unsigned ExtType, EVT VT) const {
     return VT.isSimple() &&
-      (getLoadExtAction(LType, VT) == Legal ||
-       getLoadExtAction(LType, VT) == Custom);
+      (getLoadExtAction(ExtType, VT) == Legal ||
+       getLoadExtAction(ExtType, VT) == Custom);
   }
 
   /// getTruncStoreAction - Return how this store with truncation should be
   /// treated: either it is legal, needs to be promoted to a larger size, needs
   /// to be expanded to some other code sequence, or the target has a custom
   /// expander for it.
-  LegalizeAction getTruncStoreAction(EVT ValVT,
-                                     EVT MemVT) const {
-    assert((unsigned)ValVT.getSimpleVT().SimpleTy <
-             array_lengthof(TruncStoreActions) &&
-           (unsigned)MemVT.getSimpleVT().SimpleTy <
-             sizeof(TruncStoreActions[0])*4 &&
+  LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
+    assert((unsigned)ValVT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+           (unsigned)MemVT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((TruncStoreActions[ValVT.getSimpleVT().SimpleTy] >>
-                             (2*MemVT.getSimpleVT().SimpleTy)) & 3);
+    return (LegalizeAction)TruncStoreActions[ValVT.getSimpleVT().SimpleTy]
+                                            [MemVT.getSimpleVT().SimpleTy];
   }
 
   /// isTruncStoreLegal - Return true if the specified store with truncation is
@@ -431,11 +420,11 @@ public:
   /// for it.
   LegalizeAction
   getIndexedLoadAction(unsigned IdxMode, EVT VT) const {
-    assert( IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
+    assert( IdxMode < ISD::LAST_INDEXED_MODE &&
            ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[
-                             (unsigned)VT.getSimpleVT().SimpleTy][0][IdxMode]));
+    unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
+    return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
   }
 
   /// isIndexedLoadLegal - Return true if the specified indexed load is legal
@@ -452,11 +441,11 @@ public:
   /// for it.
   LegalizeAction
   getIndexedStoreAction(unsigned IdxMode, EVT VT) const {
-    assert(IdxMode < array_lengthof(IndexedModeActions[0][1]) &&
-           (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+    assert( IdxMode < ISD::LAST_INDEXED_MODE &&
+           ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[
-              (unsigned)VT.getSimpleVT().SimpleTy][1][IdxMode]));
+    unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
+    return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
   }  
 
   /// isIndexedStoreLegal - Return true if the specified indexed load is legal
@@ -919,7 +908,7 @@ protected:
   void setBooleanContents(BooleanContent Ty) { BooleanContents = Ty; }
 
   /// setSchedulingPreference - Specify the target scheduling preference.
-  void setSchedulingPreference(SchedPreference Pref) {
+  void setSchedulingPreference(Sched::Preference Pref) {
     SchedPreferenceInfo = Pref;
   }
 
@@ -991,33 +980,28 @@ protected:
   /// with the specified type and indicate what to do about it.
   void setOperationAction(unsigned Op, MVT VT,
                           LegalizeAction Action) {
-    unsigned I = (unsigned)VT.SimpleTy;
-    unsigned J = I & 31;
-    I = I >> 5;
-    OpActions[I][Op] &= ~(uint64_t(3UL) << (J*2));
-    OpActions[I][Op] |= (uint64_t)Action << (J*2);
+    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action;
   }
   
   /// setLoadExtAction - Indicate that the specified load with extension does
   /// not work with the specified type and indicate what to do about it.
   void setLoadExtAction(unsigned ExtType, MVT VT,
-                      LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy*2 < 63 &&
-           ExtType < array_lengthof(LoadExtActions) &&
+                        LegalizeAction Action) {
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE &&
+           (unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2);
-    LoadExtActions[ExtType] |= (uint64_t)Action << VT.SimpleTy*2;
+    LoadExtActions[VT.SimpleTy][ExtType] = (uint8_t)Action;
   }
   
   /// setTruncStoreAction - Indicate that the specified truncating store does
   /// not work with the specified type and indicate what to do about it.
   void setTruncStoreAction(MVT ValVT, MVT MemVT,
                            LegalizeAction Action) {
-    assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) &&
-           (unsigned)MemVT.SimpleTy*2 < 63 &&
+    assert((unsigned)ValVT.SimpleTy < MVT::LAST_VALUETYPE &&
+           (unsigned)MemVT.SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL)  << MemVT.SimpleTy*2);
-    TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2;
+    TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action;
   }
 
   /// setIndexedLoadAction - Indicate that the specified indexed load does or
@@ -1027,9 +1011,12 @@ protected:
   void setIndexedLoadAction(unsigned IdxMode, MVT VT,
                             LegalizeAction Action) {
     assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
-           IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
+           IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf &&
            "Table isn't big enough!");
-    IndexedModeActions[(unsigned)VT.SimpleTy][0][IdxMode] = (uint8_t)Action;
+    // Load action are kept in the upper half.
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
   }
   
   /// setIndexedStoreAction - Indicate that the specified indexed store does or
@@ -1039,9 +1026,12 @@ protected:
   void setIndexedStoreAction(unsigned IdxMode, MVT VT,
                              LegalizeAction Action) {
     assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
-           IdxMode < array_lengthof(IndexedModeActions[0][1] ) &&
+           IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf &&
            "Table isn't big enough!");
-    IndexedModeActions[(unsigned)VT.SimpleTy][1][IdxMode] = (uint8_t)Action;
+    // Store action are kept in the lower half.
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
   }
   
   /// setCondCodeAction - Indicate that the specified condition code is or isn't
@@ -1103,12 +1093,6 @@ protected:
   }
   
 public:
-
-  virtual const TargetSubtarget *getSubtarget() const {
-    assert(0 && "Not Implemented");
-    return NULL;    // this is here to silence compiler errors
-  }
-
   //===--------------------------------------------------------------------===//
   // Lowering methods - These methods must be implemented by targets so that
   // the SelectionDAGLowering code knows how to lower these.
@@ -1200,61 +1184,6 @@ public:
     return SDValue();    // this is here to silence compiler errors
   }
 
-  /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
-  /// memcpy. This can be used by targets to provide code sequences for cases
-  /// that don't fit the target's parameters for simple loads/stores and can be
-  /// more efficient than using a library call. This function can return a null
-  /// SDValue if the target declines to use custom code and a different
-  /// lowering strategy should be used.
-  /// 
-  /// If AlwaysInline is true, the size is constant and the target should not
-  /// emit any calls and is strongly encouraged to attempt to emit inline code
-  /// even if it is beyond the usual threshold because this intrinsic is being
-  /// expanded in a place where calls are not feasible (e.g. within the prologue
-  /// for another call). If the target chooses to decline an AlwaysInline
-  /// request here, legalize will resort to using simple loads and stores.
-  virtual SDValue
-  EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
-                          SDValue Chain,
-                          SDValue Op1, SDValue Op2,
-                          SDValue Op3, unsigned Align, bool isVolatile,
-                          bool AlwaysInline,
-                          const Value *DstSV, uint64_t DstOff,
-                          const Value *SrcSV, uint64_t SrcOff) const {
-    return SDValue();
-  }
-
-  /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
-  /// memmove. This can be used by targets to provide code sequences for cases
-  /// that don't fit the target's parameters for simple loads/stores and can be
-  /// more efficient than using a library call. This function can return a null
-  /// SDValue if the target declines to use custom code and a different
-  /// lowering strategy should be used.
-  virtual SDValue
-  EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl,
-                           SDValue Chain,
-                           SDValue Op1, SDValue Op2,
-                           SDValue Op3, unsigned Align, bool isVolatile,
-                           const Value *DstSV, uint64_t DstOff,
-                           const Value *SrcSV, uint64_t SrcOff) const {
-    return SDValue();
-  }
-
-  /// EmitTargetCodeForMemset - Emit target-specific code that performs a
-  /// memset. This can be used by targets to provide code sequences for cases
-  /// that don't fit the target's parameters for simple stores and can be more
-  /// efficient than using a library call. This function can return a null
-  /// SDValue if the target declines to use custom code and a different
-  /// lowering strategy should be used.
-  virtual SDValue
-  EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
-                          SDValue Chain,
-                          SDValue Op1, SDValue Op2,
-                          SDValue Op3, unsigned Align, bool isVolatile,
-                          const Value *DstSV, uint64_t DstOff) const {
-    return SDValue();
-  }
-
   /// LowerOperationWrapper - This callback is invoked by the type legalizer
   /// to legalize nodes with an illegal operand type but legal result types.
   /// It replaces the LowerOperation callback in the type Legalizer.
@@ -1594,7 +1523,7 @@ private:
 
   /// SchedPreferenceInfo - The target scheduling preference: shortest possible
   /// total cycles or lowest register usage.
-  SchedPreference SchedPreferenceInfo;
+  Sched::Preference SchedPreferenceInfo;
   
   /// JumpBufSize - The size, in bytes, of the target's jmp_buf buffers
   unsigned JumpBufSize;
@@ -1653,26 +1582,24 @@ private:
   /// Most operations are Legal (aka, supported natively by the target), but
   /// operations that are not should be described.  Note that operations on
   /// non-legal value types are not described here.
-  /// This array is accessed using VT.getSimpleVT(), so it is subject to
-  /// the MVT::MAX_ALLOWED_VALUETYPE * 2 bits.
-  uint64_t OpActions[MVT::MAX_ALLOWED_VALUETYPE/(sizeof(uint64_t)*4)][ISD::BUILTIN_OP_END];
+  uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
   
-  /// LoadExtActions - For each load of load extension type and each value type,
+  /// LoadExtActions - For each load extension type and each value type,
   /// keep a LegalizeAction that indicates how instruction selection should deal
-  /// with the load.
-  uint64_t LoadExtActions[ISD::LAST_LOADEXT_TYPE];
+  /// with a load of a specific value type and extension type.
+  uint8_t LoadExtActions[MVT::LAST_VALUETYPE][ISD::LAST_LOADEXT_TYPE];
   
-  /// TruncStoreActions - For each truncating store, keep a LegalizeAction that
-  /// indicates how instruction selection should deal with the store.
-  uint64_t TruncStoreActions[MVT::LAST_VALUETYPE];
+  /// TruncStoreActions - For each value type pair keep a LegalizeAction that
+  /// indicates whether a truncating store of a specific value type and
+  /// truncating type is legal.
+  uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
 
   /// IndexedModeActions - For each indexed mode and each value type,
   /// keep a pair of LegalizeAction that indicates how instruction
-  /// selection should deal with the load / store.  The first
-  /// dimension is now the value_type for the reference.  The second
-  /// dimension is the load [0] vs. store[1].  The third dimension
-  /// represents the various modes for load store.
-  uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][2][ISD::LAST_INDEXED_MODE];
+  /// selection should deal with the load / store.  The first dimension is the
+  /// value_type for the reference. The second dimension represents the various
+  /// modes for load store.
+  uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
   
   /// CondCodeActions - For each condition code (ISD::CondCode) keep a
   /// LegalizeAction that indicates how instruction selection should
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 6c99598..819709f 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -88,6 +88,10 @@ protected:
   const MCSection *DwarfRangesSection;
   const MCSection *DwarfMacroInfoSection;
   
+  // Extra TLS Variable Data section.  If the target needs to put additional
+  // information for a TLS variable, it'll go here.
+  const MCSection *TLSExtraDataSection;
+  
   /// SupportsWeakEmptyEHFrame - True if target object file supports a
   /// weak_definition of constant 0 for an omitted EH frame.
   bool SupportsWeakOmittedEHFrame;
@@ -147,6 +151,9 @@ public:
   const MCSection *getDwarfMacroInfoSection() const {
     return DwarfMacroInfoSection;
   }
+  const MCSection *getTLSExtraDataSection() const {
+    return TLSExtraDataSection;
+  }
   
   /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively
   /// decide not to emit the UsedDirective for some symbols in llvm.used.
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index c734cf4..227499b 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -70,6 +70,15 @@ namespace CodeGenOpt {
   };
 }
 
+namespace Sched {
+  enum Preference {
+    None,             // No preference
+    Latency,          // Scheduling for shortest total latency.
+    RegPressure,      // Scheduling for lowest register pressure.
+    Hybrid            // Scheduling for both latency and register pressure.
+  };
+}
+
 //===----------------------------------------------------------------------===//
 ///
 /// TargetMachine - Primary interface to the complete machine description for
@@ -92,7 +101,9 @@ protected: // Can only create subclasses.
   /// AsmInfo - Contains target specific asm information.
   ///
   const MCAsmInfo *AsmInfo;
-  
+
+  unsigned MCRelaxAll : 1;
+
 public:
   virtual ~TargetMachine();
 
@@ -149,6 +160,14 @@ public:
   /// 
   virtual const TargetELFWriterInfo *getELFWriterInfo() const { return 0; }
 
+  /// hasMCRelaxAll - Check whether all machine code instructions should be
+  /// relaxed.
+  bool hasMCRelaxAll() const { return MCRelaxAll; }
+
+  /// setMCRelaxAll - Set whether all machine code instructions should be
+  /// relaxed.
+  void setMCRelaxAll(bool Value) { MCRelaxAll = Value; }
+
   /// getRelocationModel - Returns the code generation relocation model. The
   /// choices are static, PIC, and dynamic-no-pic, and target default.
   static Reloc::Model getRelocationModel();
@@ -225,17 +244,6 @@ public:
                                           bool = true) {
     return true;
   }
-
-  /// addPassesToEmitWholeFile - This method can be implemented by targets that 
-  /// require having the entire module at once.  This is not recommended, do not
-  /// use this.
-  virtual bool WantsWholeFile() const { return false; }
-  virtual bool addPassesToEmitWholeFile(PassManager &, formatted_raw_ostream &,
-                                        CodeGenFileType,
-                                        CodeGenOpt::Level,
-                                        bool = true) {
-    return true;
-  }
 };
 
 /// LLVMTargetMachine - This class describes a target machine that is
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 29b862a..7c37b73 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -28,6 +28,7 @@ class BitVector;
 class MachineFunction;
 class MachineMove;
 class RegScavenger;
+template<class T> class SmallVectorImpl;
 
 /// TargetRegisterDesc - This record contains all of the information known about
 /// a particular register.  The AliasSet field (if not null) contains a pointer
@@ -151,9 +152,6 @@ public:
   /// index SubIdx, or NULL if no such class exists.
   const TargetRegisterClass* getSubRegisterRegClass(unsigned SubIdx) const {
     assert(SubIdx>0 && "Invalid subregister index");
-    for (unsigned s = 0; s != SubIdx-1; ++s)
-      if (!SubRegClasses[s])
-        return NULL;
     return SubRegClasses[SubIdx-1];
   }
 
@@ -262,14 +260,13 @@ class TargetRegisterInfo {
 protected:
   const unsigned* SubregHash;
   const unsigned SubregHashSize;
-  const unsigned* SuperregHash;
-  const unsigned SuperregHashSize;
   const unsigned* AliasesHash;
   const unsigned AliasesHashSize;
 public:
   typedef const TargetRegisterClass * const * regclass_iterator;
 private:
   const TargetRegisterDesc *Desc;             // Pointer to the descriptor array
+  const char *const *SubRegIndexNames;        // Names of subreg indexes.
   unsigned NumRegs;                           // Number of entries in the array
 
   regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
@@ -280,12 +277,11 @@ protected:
   TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
                      regclass_iterator RegClassBegin,
                      regclass_iterator RegClassEnd,
+                     const char *const *subregindexnames,
                      int CallFrameSetupOpcode = -1,
                      int CallFrameDestroyOpcode = -1,
                      const unsigned* subregs = 0,
                      const unsigned subregsize = 0,
-                     const unsigned* superregs = 0,
-                     const unsigned superregsize = 0,
                      const unsigned* aliases = 0,
                      const unsigned aliasessize = 0);
   virtual ~TargetRegisterInfo();
@@ -380,6 +376,13 @@ public:
     return NumRegs;
   }
 
+  /// getSubRegIndexName - Return the human-readable symbolic target-specific
+  /// name for the specified SubRegIndex.
+  const char *getSubRegIndexName(unsigned SubIdx) const {
+    assert(SubIdx && "This is not a subregister index");
+    return SubRegIndexNames[SubIdx-1];
+  }
+
   /// regsOverlap - Returns true if the two registers are equal or alias each
   /// other. The registers may be virtual register.
   bool regsOverlap(unsigned regA, unsigned regB) const {
@@ -425,19 +428,7 @@ public:
   /// isSuperRegister - Returns true if regB is a super-register of regA.
   ///
   bool isSuperRegister(unsigned regA, unsigned regB) const {
-    // SuperregHash is a simple quadratically probed hash table.
-    size_t index = (regA + regB * 37) & (SuperregHashSize-1);
-    unsigned ProbeAmt = 2;
-    while (SuperregHash[index*2] != 0 &&
-           SuperregHash[index*2+1] != 0) {
-      if (SuperregHash[index*2] == regA && SuperregHash[index*2+1] == regB)
-        return true;
-
-      index = (index + ProbeAmt) & (SuperregHashSize-1);
-      ProbeAmt += 2;
-    }
-
-    return false;
+    return isSubRegister(regB, regA);
   }
 
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
@@ -479,6 +470,17 @@ public:
     return 0;
   }
 
+  /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+  /// indices, return true if it's possible to combine the sub-register indices
+  /// into one that corresponds to a larger sub-register. Return the new sub-
+  /// register index by reference. Note the new index by be zero if the given
+  /// sub-registers combined to form the whole register.
+  virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+                                      SmallVectorImpl<unsigned> &SubIndices,
+                                      unsigned &NewSubIdx) const {
+    return 0;
+  }
+
   /// getMatchingSuperRegClass - Return a subclass of the specified register
   /// class A so that each register in it has a sub-register of the
   /// specified sub-register index which is in the specified register class B.
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index 36bbe00..1418bee 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -73,6 +73,13 @@ namespace llvm {
     typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T,
                                                 TargetMachine &TM,
                                                 MCContext &Ctx);
+    typedef MCStreamer *(*ObjectStreamerCtorTy)(const Target &T,
+                                                const std::string &TT,
+                                                MCContext &Ctx,
+                                                TargetAsmBackend &TAB,
+                                                raw_ostream &_OS,
+                                                MCCodeEmitter *_Emitter,
+                                                bool RelaxAll);
 
   private:
     /// Next - The next registered target in the linked list, maintained by the
@@ -126,6 +133,10 @@ namespace llvm {
     /// if registered.
     CodeEmitterCtorTy CodeEmitterCtorFn;
 
+    /// ObjectStreamerCtorFn - Construction function for this target's
+    /// ObjectStreamer, if registered.
+    ObjectStreamerCtorTy ObjectStreamerCtorFn;
+
   public:
     /// @name Target Information
     /// @{
@@ -170,6 +181,9 @@ namespace llvm {
     /// hasCodeEmitter - Check if this target supports instruction encoding.
     bool hasCodeEmitter() const { return CodeEmitterCtorFn != 0; }
 
+    /// hasObjectStreamer - Check if this target supports streaming to files.
+    bool hasObjectStreamer() const { return ObjectStreamerCtorFn != 0; }
+
     /// @}
     /// @name Feature Constructors
     /// @{
@@ -258,6 +272,24 @@ namespace llvm {
       return CodeEmitterCtorFn(*this, TM, Ctx);
     }
 
+    /// createObjectStreamer - Create a target specific MCStreamer.
+    ///
+    /// \arg TT - The target triple.
+    /// \arg Ctx - The target context.
+    /// \arg TAB - The target assembler backend object.
+    /// \arg _OS - The stream object.
+    /// \arg _Emitter - The target independent assembler object.
+    /// \arg RelaxAll - Relax all fixups?
+    MCStreamer *createObjectStreamer(const std::string &TT, MCContext &Ctx,
+                                     TargetAsmBackend &TAB,
+                                     raw_ostream &_OS,
+                                     MCCodeEmitter *_Emitter,
+                                     bool RelaxAll) const {
+      if (!ObjectStreamerCtorFn)
+        return 0;
+      return ObjectStreamerCtorFn(*this, TT, Ctx, TAB, _OS, _Emitter, RelaxAll);
+    }
+
     /// @}
   };
 
@@ -479,6 +511,20 @@ namespace llvm {
         T.CodeEmitterCtorFn = Fn;
     }
 
+    /// RegisterObjectStreamer - Register an MCStreamer implementation
+    /// for the given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCStreamer for the target.
+    static void RegisterObjectStreamer(Target &T, Target::ObjectStreamerCtorTy Fn) {
+      if (!T.ObjectStreamerCtorFn)
+        T.ObjectStreamerCtorFn = Fn;
+    }
+
     /// @}
   };
 
diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h
index 943bdea..2be1834 100644
--- a/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -16,19 +16,84 @@
 #ifndef LLVM_TARGET_TARGETSELECTIONDAGINFO_H
 #define LLVM_TARGET_TARGETSELECTIONDAGINFO_H
 
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
 namespace llvm {
 
+class TargetData;
+class TargetMachine;
+
 //===----------------------------------------------------------------------===//
-/// TargetSelectionDAGLowering - Targets can subclass this to parameterize the
+/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the
 /// SelectionDAG lowering and instruction selection process.
 ///
 class TargetSelectionDAGInfo {
   TargetSelectionDAGInfo(const TargetSelectionDAGInfo &); // DO NOT IMPLEMENT
   void operator=(const TargetSelectionDAGInfo &);         // DO NOT IMPLEMENT
 
+  const TargetData *TD;
+
+protected:
+  const TargetData *getTargetData() const { return TD; }
+
 public:
-  TargetSelectionDAGInfo();
+  explicit TargetSelectionDAGInfo(const TargetMachine &TM);
   virtual ~TargetSelectionDAGInfo();
+
+  /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
+  /// memcpy. This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple loads/stores and can be
+  /// more efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  /// 
+  /// If AlwaysInline is true, the size is constant and the target should not
+  /// emit any calls and is strongly encouraged to attempt to emit inline code
+  /// even if it is beyond the usual threshold because this intrinsic is being
+  /// expanded in a place where calls are not feasible (e.g. within the prologue
+  /// for another call). If the target chooses to decline an AlwaysInline
+  /// request here, legalize will resort to using simple loads and stores.
+  virtual SDValue
+  EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                          SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          SDValue Op3, unsigned Align, bool isVolatile,
+                          bool AlwaysInline,
+                          const Value *DstSV, uint64_t DstOff,
+                          const Value *SrcSV, uint64_t SrcOff) const {
+    return SDValue();
+  }
+
+  /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
+  /// memmove. This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple loads/stores and can be
+  /// more efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  virtual SDValue
+  EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl,
+                           SDValue Chain,
+                           SDValue Op1, SDValue Op2,
+                           SDValue Op3, unsigned Align, bool isVolatile,
+                           const Value *DstSV, uint64_t DstOff,
+                           const Value *SrcSV, uint64_t SrcOff) const {
+    return SDValue();
+  }
+
+  /// EmitTargetCodeForMemset - Emit target-specific code that performs a
+  /// memset. This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple stores and can be more
+  /// efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  virtual SDValue
+  EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                          SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          SDValue Op3, unsigned Align, bool isVolatile,
+                          const Value *DstSV, uint64_t DstOff) const {
+    return SDValue();
+  }
 };
 
 } // end llvm namespace
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index a8c9c6a..0d338b5 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -332,6 +332,12 @@ FunctionPass *createGEPSplitterPass();
 //
 FunctionPass *createABCDPass();
 
+//===----------------------------------------------------------------------===//
+//
+// Sink - Code Sinking
+//
+FunctionPass *createSinkingPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index 5b77ed6..ca98466 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -19,8 +19,8 @@ namespace llvm {
   class BasicBlock;
   class Use;
   class PHINode;
-  template<typename T>
-  class SmallVectorImpl;
+  template<typename T> class SmallVectorImpl;
+  template<typename T> class SSAUpdaterTraits;
   class BumpPtrAllocator;
 
 /// SSAUpdater - This class updates SSA form for a set of values defined in
@@ -28,9 +28,7 @@ namespace llvm {
 /// transformation wants to rewrite a set of uses of one value with uses of a
 /// set of values.
 class SSAUpdater {
-public:
-  class BBInfo;
-  typedef SmallVectorImpl<BBInfo*> BlockListTy;
+  friend class SSAUpdaterTraits<SSAUpdater>;
 
 private:
   /// AvailableVals - This keeps track of which value to use on a per-block
@@ -42,14 +40,10 @@ private:
   /// and a type for PHI nodes.
   Value *PrototypeValue;
 
-  /// BBMap - The GetValueAtEndOfBlock method maintains this mapping from
-  /// basic blocks to BBInfo structures.
-  /// typedef DenseMap<BasicBlock*, BBInfo*> BBMapTy;
-  void *BM;
-
   /// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that
   /// it creates to the vector.
   SmallVectorImpl<PHINode*> *InsertedPHIs;
+
 public:
   /// SSAUpdater constructor.  If InsertedPHIs is specified, it will be filled
   /// in with all PHI Nodes created by rewriting.
@@ -102,14 +96,6 @@ public:
 
 private:
   Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
-  void BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
-                      BumpPtrAllocator *Allocator);
-  void FindDominators(BlockListTy *BlockList);
-  void FindPHIPlacement(BlockListTy *BlockList);
-  void FindAvailableVals(BlockListTy *BlockList);
-  void FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList);
-  bool CheckIfPHIMatches(PHINode *PHI);
-  void RecordMatchingPHI(PHINode *PHI);
 
   void operator=(const SSAUpdater&); // DO NOT IMPLEMENT
   SSAUpdater(const SSAUpdater&);     // DO NOT IMPLEMENT
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
new file mode 100644
index 0000000..5a03d22
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -0,0 +1,469 @@
+//===-- SSAUpdaterImpl.h - SSA Updater Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a template that implements the core algorithm for the
+// SSAUpdater and MachineSSAUpdater.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H
+#define LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H
+
+namespace llvm {
+
+template<typename T> class SSAUpdaterTraits;
+
+template<typename UpdaterT>
+class SSAUpdaterImpl {
+private:
+  UpdaterT *Updater;
+
+  typedef SSAUpdaterTraits<UpdaterT> Traits;
+  typedef typename Traits::BlkT BlkT;
+  typedef typename Traits::ValT ValT;
+  typedef typename Traits::PhiT PhiT;
+
+  /// BBInfo - Per-basic block information used internally by SSAUpdaterImpl.
+  /// The predecessors of each block are cached here since pred_iterator is
+  /// slow and we need to iterate over the blocks at least a few times.
+  class BBInfo {
+  public:
+    BlkT *BB;          // Back-pointer to the corresponding block.
+    ValT AvailableVal; // Value to use in this block.
+    BBInfo *DefBB;     // Block that defines the available value.
+    int BlkNum;        // Postorder number.
+    BBInfo *IDom;      // Immediate dominator.
+    unsigned NumPreds; // Number of predecessor blocks.
+    BBInfo **Preds;    // Array[NumPreds] of predecessor blocks.
+    PhiT *PHITag;      // Marker for existing PHIs that match.
+
+    BBInfo(BlkT *ThisBB, ValT V)
+      : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+  };
+
+  typedef DenseMap<BlkT*, ValT> AvailableValsTy;
+  AvailableValsTy *AvailableVals;
+
+  SmallVectorImpl<PhiT*> *InsertedPHIs;
+
+  typedef SmallVectorImpl<BBInfo*> BlockListTy;
+  typedef DenseMap<BlkT*, BBInfo*> BBMapTy;
+  BBMapTy BBMap;
+  BumpPtrAllocator Allocator;
+
+public:
+  explicit SSAUpdaterImpl(UpdaterT *U, AvailableValsTy *A,
+                          SmallVectorImpl<PhiT*> *Ins) :
+    Updater(U), AvailableVals(A), InsertedPHIs(Ins) { }
+
+  /// GetValue - Check to see if AvailableVals has an entry for the specified
+  /// BB and if so, return it.  If not, construct SSA form by first
+  /// calculating the required placement of PHIs and then inserting new PHIs
+  /// where needed.
+  ValT GetValue(BlkT *BB) {
+    SmallVector<BBInfo*, 100> BlockList;
+    BBInfo *PseudoEntry = BuildBlockList(BB, &BlockList);
+
+    // Special case: bail out if BB is unreachable.
+    if (BlockList.size() == 0) {
+      ValT V = Traits::GetUndefVal(BB, Updater);
+      (*AvailableVals)[BB] = V;
+      return V;
+    }
+
+    FindDominators(&BlockList, PseudoEntry);
+    FindPHIPlacement(&BlockList);
+    FindAvailableVals(&BlockList);
+
+    return BBMap[BB]->DefBB->AvailableVal;
+  }
+
+  /// BuildBlockList - Starting from the specified basic block, traverse back
+  /// through its predecessors until reaching blocks with known values.
+  /// Create BBInfo structures for the blocks and append them to the block
+  /// list.
+  BBInfo *BuildBlockList(BlkT *BB, BlockListTy *BlockList) {
+    SmallVector<BBInfo*, 10> RootList;
+    SmallVector<BBInfo*, 64> WorkList;
+
+    BBInfo *Info = new (Allocator) BBInfo(BB, 0);
+    BBMap[BB] = Info;
+    WorkList.push_back(Info);
+
+    // Search backward from BB, creating BBInfos along the way and stopping
+    // when reaching blocks that define the value.  Record those defining
+    // blocks on the RootList.
+    SmallVector<BlkT*, 10> Preds;
+    while (!WorkList.empty()) {
+      Info = WorkList.pop_back_val();
+      Preds.clear();
+      Traits::FindPredecessorBlocks(Info->BB, &Preds);
+      Info->NumPreds = Preds.size();
+      if (Info->NumPreds == 0)
+        Info->Preds = 0;
+      else
+        Info->Preds = static_cast<BBInfo**>
+          (Allocator.Allocate(Info->NumPreds * sizeof(BBInfo*),
+                              AlignOf<BBInfo*>::Alignment));
+
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        BlkT *Pred = Preds[p];
+        // Check if BBMap already has a BBInfo for the predecessor block.
+        typename BBMapTy::value_type &BBMapBucket =
+          BBMap.FindAndConstruct(Pred);
+        if (BBMapBucket.second) {
+          Info->Preds[p] = BBMapBucket.second;
+          continue;
+        }
+
+        // Create a new BBInfo for the predecessor.
+        ValT PredVal = AvailableVals->lookup(Pred);
+        BBInfo *PredInfo = new (Allocator) BBInfo(Pred, PredVal);
+        BBMapBucket.second = PredInfo;
+        Info->Preds[p] = PredInfo;
+
+        if (PredInfo->AvailableVal) {
+          RootList.push_back(PredInfo);
+          continue;
+        }
+        WorkList.push_back(PredInfo);
+      }
+    }
+
+    // Now that we know what blocks are backwards-reachable from the starting
+    // block, do a forward depth-first traversal to assign postorder numbers
+    // to those blocks.
+    BBInfo *PseudoEntry = new (Allocator) BBInfo(0, 0);
+    unsigned BlkNum = 1;
+
+    // Initialize the worklist with the roots from the backward traversal.
+    while (!RootList.empty()) {
+      Info = RootList.pop_back_val();
+      Info->IDom = PseudoEntry;
+      Info->BlkNum = -1;
+      WorkList.push_back(Info);
+    }
+
+    while (!WorkList.empty()) {
+      Info = WorkList.back();
+
+      if (Info->BlkNum == -2) {
+        // All the successors have been handled; assign the postorder number.
+        Info->BlkNum = BlkNum++;
+        // If not a root, put it on the BlockList.
+        if (!Info->AvailableVal)
+          BlockList->push_back(Info);
+        WorkList.pop_back();
+        continue;
+      }
+
+      // Leave this entry on the worklist, but set its BlkNum to mark that its
+      // successors have been put on the worklist.  When it returns to the top
+      // the list, after handling its successors, it will be assigned a
+      // number.
+      Info->BlkNum = -2;
+
+      // Add unvisited successors to the work list.
+      for (typename Traits::BlkSucc_iterator SI =
+             Traits::BlkSucc_begin(Info->BB),
+             E = Traits::BlkSucc_end(Info->BB); SI != E; ++SI) {
+        BBInfo *SuccInfo = BBMap[*SI];
+        if (!SuccInfo || SuccInfo->BlkNum)
+          continue;
+        SuccInfo->BlkNum = -1;
+        WorkList.push_back(SuccInfo);
+      }
+    }
+    PseudoEntry->BlkNum = BlkNum;
+    return PseudoEntry;
+  }
+
+  /// IntersectDominators - This is the dataflow lattice "meet" operation for
+  /// finding dominators.  Given two basic blocks, it walks up the dominator
+  /// tree until it finds a common dominator of both.  It uses the postorder
+  /// number of the blocks to determine how to do that.
+  BBInfo *IntersectDominators(BBInfo *Blk1, BBInfo *Blk2) {
+    while (Blk1 != Blk2) {
+      while (Blk1->BlkNum < Blk2->BlkNum) {
+        Blk1 = Blk1->IDom;
+        if (!Blk1)
+          return Blk2;
+      }
+      while (Blk2->BlkNum < Blk1->BlkNum) {
+        Blk2 = Blk2->IDom;
+        if (!Blk2)
+          return Blk1;
+      }
+    }
+    return Blk1;
+  }
+
+  /// FindDominators - Calculate the dominator tree for the subset of the CFG
+  /// corresponding to the basic blocks on the BlockList.  This uses the
+  /// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey
+  /// and Kennedy, published in Software--Practice and Experience, 2001,
+  /// 4:1-10.  Because the CFG subset does not include any edges leading into
+  /// blocks that define the value, the results are not the usual dominator
+  /// tree.  The CFG subset has a single pseudo-entry node with edges to a set
+  /// of root nodes for blocks that define the value.  The dominators for this
+  /// subset CFG are not the standard dominators but they are adequate for
+  /// placing PHIs within the subset CFG.
+  void FindDominators(BlockListTy *BlockList, BBInfo *PseudoEntry) {
+    bool Changed;
+    do {
+      Changed = false;
+      // Iterate over the list in reverse order, i.e., forward on CFG edges.
+      for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(),
+             E = BlockList->rend(); I != E; ++I) {
+        BBInfo *Info = *I;
+        BBInfo *NewIDom = 0;
+
+        // Iterate through the block's predecessors.
+        for (unsigned p = 0; p != Info->NumPreds; ++p) {
+          BBInfo *Pred = Info->Preds[p];
+
+          // Treat an unreachable predecessor as a definition with 'undef'.
+          if (Pred->BlkNum == 0) {
+            Pred->AvailableVal = Traits::GetUndefVal(Pred->BB, Updater);
+            (*AvailableVals)[Pred->BB] = Pred->AvailableVal;
+            Pred->DefBB = Pred;
+            Pred->BlkNum = PseudoEntry->BlkNum;
+            PseudoEntry->BlkNum++;
+          }
+
+          if (!NewIDom)
+            NewIDom = Pred;
+          else
+            NewIDom = IntersectDominators(NewIDom, Pred);
+        }
+
+        // Check if the IDom value has changed.
+        if (NewIDom && NewIDom != Info->IDom) {
+          Info->IDom = NewIDom;
+          Changed = true;
+        }
+      }
+    } while (Changed);
+  }
+
+  /// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+  /// any blocks containing definitions of the value.  If one is found, then
+  /// the successor of Pred is in the dominance frontier for the definition,
+  /// and this function returns true.
+  bool IsDefInDomFrontier(const BBInfo *Pred, const BBInfo *IDom) {
+    for (; Pred != IDom; Pred = Pred->IDom) {
+      if (Pred->DefBB == Pred)
+        return true;
+    }
+    return false;
+  }
+
+  /// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers
+  /// of the known definitions.  Iteratively add PHIs in the dom frontiers
+  /// until nothing changes.  Along the way, keep track of the nearest
+  /// dominating definitions for non-PHI blocks.
+  void FindPHIPlacement(BlockListTy *BlockList) {
+    bool Changed;
+    do {
+      Changed = false;
+      // Iterate over the list in reverse order, i.e., forward on CFG edges.
+      for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(),
+             E = BlockList->rend(); I != E; ++I) {
+        BBInfo *Info = *I;
+
+        // If this block already needs a PHI, there is nothing to do here.
+        if (Info->DefBB == Info)
+          continue;
+
+        // Default to use the same def as the immediate dominator.
+        BBInfo *NewDefBB = Info->IDom->DefBB;
+        for (unsigned p = 0; p != Info->NumPreds; ++p) {
+          if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+            // Need a PHI here.
+            NewDefBB = Info;
+            break;
+          }
+        }
+
+        // Check if anything changed.
+        if (NewDefBB != Info->DefBB) {
+          Info->DefBB = NewDefBB;
+          Changed = true;
+        }
+      }
+    } while (Changed);
+  }
+
+  /// FindAvailableVal - If this block requires a PHI, first check if an
+  /// existing PHI matches the PHI placement and reaching definitions computed
+  /// earlier, and if not, create a new PHI.  Visit all the block's
+  /// predecessors to calculate the available value for each one and fill in
+  /// the incoming values for a new PHI.
+  void FindAvailableVals(BlockListTy *BlockList) {
+    // Go through the worklist in forward order (i.e., backward through the CFG)
+    // and check if existing PHIs can be used.  If not, create empty PHIs where
+    // they are needed.
+    for (typename BlockListTy::iterator I = BlockList->begin(),
+           E = BlockList->end(); I != E; ++I) {
+      BBInfo *Info = *I;
+      // Check if there needs to be a PHI in BB.
+      if (Info->DefBB != Info)
+        continue;
+
+      // Look for an existing PHI.
+      FindExistingPHI(Info->BB, BlockList);
+      if (Info->AvailableVal)
+        continue;
+
+      ValT PHI = Traits::CreateEmptyPHI(Info->BB, Info->NumPreds, Updater);
+      Info->AvailableVal = PHI;
+      (*AvailableVals)[Info->BB] = PHI;
+    }
+
+    // Now go back through the worklist in reverse order to fill in the
+    // arguments for any new PHIs added in the forward traversal.
+    for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      if (Info->DefBB != Info) {
+        // Record the available value at join nodes to speed up subsequent
+        // uses of this SSAUpdater for the same value.
+        if (Info->NumPreds > 1)
+          (*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal;
+        continue;
+      }
+
+      // Check if this block contains a newly added PHI.
+      PhiT *PHI = Traits::ValueIsNewPHI(Info->AvailableVal, Updater);
+      if (!PHI)
+        continue;
+
+      // Iterate through the block's predecessors.
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        BBInfo *PredInfo = Info->Preds[p];
+        BlkT *Pred = PredInfo->BB;
+        // Skip to the nearest preceding definition.
+        if (PredInfo->DefBB != PredInfo)
+          PredInfo = PredInfo->DefBB;
+        Traits::AddPHIOperand(PHI, PredInfo->AvailableVal, Pred);
+      }
+
+      DEBUG(dbgs() << "  Inserted PHI: " << *PHI << "\n");
+
+      // If the client wants to know about all new instructions, tell it.
+      if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+    }
+  }
+
+  /// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+  /// them match what is needed.
+  void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) {
+    for (typename BlkT::iterator BBI = BB->begin(), BBE = BB->end();
+         BBI != BBE; ++BBI) {
+      PhiT *SomePHI = Traits::InstrIsPHI(BBI);
+      if (!SomePHI)
+        break;
+      if (CheckIfPHIMatches(SomePHI)) {
+        RecordMatchingPHI(SomePHI);
+        break;
+      }
+      // Match failed: clear all the PHITag values.
+      for (typename BlockListTy::iterator I = BlockList->begin(),
+             E = BlockList->end(); I != E; ++I)
+        (*I)->PHITag = 0;
+    }
+  }
+
+  /// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+  /// in the BBMap.
+  bool CheckIfPHIMatches(PhiT *PHI) {
+    SmallVector<PhiT*, 20> WorkList;
+    WorkList.push_back(PHI);
+
+    // Mark that the block containing this PHI has been visited.
+    BBMap[PHI->getParent()]->PHITag = PHI;
+
+    while (!WorkList.empty()) {
+      PHI = WorkList.pop_back_val();
+
+      // Iterate through the PHI's incoming values.
+      for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI),
+             E = Traits::PHI_end(PHI); I != E; ++I) {
+        ValT IncomingVal = I.getIncomingValue();
+        BBInfo *PredInfo = BBMap[I.getIncomingBlock()];
+        // Skip to the nearest preceding definition.
+        if (PredInfo->DefBB != PredInfo)
+          PredInfo = PredInfo->DefBB;
+
+        // Check if it matches the expected value.
+        if (PredInfo->AvailableVal) {
+          if (IncomingVal == PredInfo->AvailableVal)
+            continue;
+          return false;
+        }
+
+        // Check if the value is a PHI in the correct block.
+        PhiT *IncomingPHIVal = Traits::ValueIsPHI(IncomingVal, Updater);
+        if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
+          return false;
+
+        // If this block has already been visited, check if this PHI matches.
+        if (PredInfo->PHITag) {
+          if (IncomingPHIVal == PredInfo->PHITag)
+            continue;
+          return false;
+        }
+        PredInfo->PHITag = IncomingPHIVal;
+
+        WorkList.push_back(IncomingPHIVal);
+      }
+    }
+    return true;
+  }
+
+  /// RecordMatchingPHI - For a PHI node that matches, record it and its input
+  /// PHIs in both the BBMap and the AvailableVals mapping.
+  void RecordMatchingPHI(PhiT *PHI) {
+    SmallVector<PhiT*, 20> WorkList;
+    WorkList.push_back(PHI);
+
+    // Record this PHI.
+    BlkT *BB = PHI->getParent();
+    ValT PHIVal = Traits::GetPHIValue(PHI);
+    (*AvailableVals)[BB] = PHIVal;
+    BBMap[BB]->AvailableVal = PHIVal;
+
+    while (!WorkList.empty()) {
+      PHI = WorkList.pop_back_val();
+
+      // Iterate through the PHI's incoming values.
+      for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI),
+             E = Traits::PHI_end(PHI); I != E; ++I) {
+        ValT IncomingVal = I.getIncomingValue();
+        PhiT *IncomingPHI = Traits::ValueIsPHI(IncomingVal, Updater);
+        if (!IncomingPHI) continue;
+        BB = IncomingPHI->getParent();
+        BBInfo *Info = BBMap[BB];
+        if (!Info || Info->AvailableVal)
+          continue;
+
+        // Record the PHI and add it to the worklist.
+        (*AvailableVals)[BB] = IncomingVal;
+        Info->AvailableVal = IncomingVal;
+        WorkList.push_back(IncomingPHI);
+      }
+    }
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ad05dd9..5a37ce0 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMAnalysis
   LoopPass.cpp
   MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
+  ModuleDebugInfoPrinter.cpp
   PHITransAddr.cpp
   PointerTracking.cpp
   PostDominators.cpp
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 141b181..a7b6d2b 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -32,42 +32,6 @@ using namespace llvm::dwarf;
 // DIDescriptor
 //===----------------------------------------------------------------------===//
 
-/// ValidDebugInfo - Return true if V represents valid debug info value.
-/// FIXME : Add DIDescriptor.isValid()
-bool DIDescriptor::ValidDebugInfo(MDNode *N, unsigned OptLevel) {
-  if (!N)
-    return false;
-
-  DIDescriptor DI(N);
-
-  // Check current version. Allow Version7 for now.
-  unsigned Version = DI.getVersion();
-  if (Version != LLVMDebugVersion && Version != LLVMDebugVersion7)
-    return false;
-
-  switch (DI.getTag()) {
-  case DW_TAG_variable:
-    assert(DIVariable(N).Verify() && "Invalid DebugInfo value");
-    break;
-  case DW_TAG_compile_unit:
-    assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value");
-    break;
-  case DW_TAG_subprogram:
-    assert(DISubprogram(N).Verify() && "Invalid DebugInfo value");
-    break;
-  case DW_TAG_lexical_block:
-    // FIXME: This interfers with the quality of generated code during
-    // optimization.
-    if (OptLevel != CodeGenOpt::None)
-      return false;
-    // FALLTHROUGH
-  default:
-    break;
-  }
-
-  return true;
-}
-
 StringRef 
 DIDescriptor::getStringField(unsigned Elt) const {
   if (DbgNode == 0)
@@ -96,7 +60,7 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
     return DIDescriptor();
 
   if (Elt < DbgNode->getNumOperands())
-    return DIDescriptor(dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt)));
+    return DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
   return DIDescriptor();
 }
 
@@ -246,7 +210,7 @@ bool DIDescriptor::isEnumerator() const {
 // Simple Descriptor Constructors and other Methods
 //===----------------------------------------------------------------------===//
 
-DIType::DIType(MDNode *N) : DIScope(N) {
+DIType::DIType(const MDNode *N) : DIScope(N) {
   if (!N) return;
   if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
     DbgNode = 0;
@@ -271,9 +235,11 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
   // which, due to uniquing, has merged with the source. We shield clients from
   // this detail by allowing a value to be replaced with replaceAllUsesWith()
   // itself.
-  if (getNode() != D.getNode()) {
-    MDNode *Node = DbgNode;
-    Node->replaceAllUsesWith(D.getNode());
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
     Node->destroy();
   }
 }
@@ -366,6 +332,9 @@ bool DIVariable::Verify() const {
   if (!getContext().Verify())
     return false;
 
+  if (!getCompileUnit().Verify())
+    return false;
+
   DIType Ty = getType();
   if (!Ty.Verify())
     return false;
@@ -381,6 +350,17 @@ bool DILocation::Verify() const {
   return DbgNode->getNumOperands() == 4;
 }
 
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (getName().empty())
+    return false;
+  if (!getCompileUnit().Verify())
+    return false;
+  return true;
+}
+
 /// getOriginalTypeSize - If this type is derived from a base type then
 /// return base type size.
 uint64_t DIDerivedType::getOriginalTypeSize() const {
@@ -394,7 +374,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
     if (!BaseType.isValid())
       return getSizeInBits();
     if (BaseType.isDerivedType())
-      return DIDerivedType(BaseType.getNode()).getOriginalTypeSize();
+      return DIDerivedType(BaseType).getOriginalTypeSize();
     else
       return BaseType.getSizeInBits();
   }
@@ -410,7 +390,7 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
     return false;
   // This variable is not inlined function argument if its scope 
   // does not describe current function.
-  return !(DISubprogram(getContext().getNode()).describes(CurFn));
+  return !(DISubprogram(getContext()).describes(CurFn));
 }
 
 /// describes - Return true if this subprogram provides debugging
@@ -475,144 +455,182 @@ StringRef DIScope::getDirectory() const {
 //===----------------------------------------------------------------------===//
 
 
-/// dump - Print descriptor.
-void DIDescriptor::dump() const {
-  dbgs() << "[" << dwarf::TagString(getTag()) << "] ";
-  dbgs().write_hex((intptr_t) &*DbgNode) << ']';
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+  OS << "[" << dwarf::TagString(getTag()) << "] ";
+  OS.write_hex((intptr_t) &*DbgNode) << ']';
 }
 
-/// dump - Print compile unit.
-void DICompileUnit::dump() const {
+/// print - Print compile unit.
+void DICompileUnit::print(raw_ostream &OS) const {
   if (getLanguage())
-    dbgs() << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+    OS << " [" << dwarf::LanguageString(getLanguage()) << "] ";
 
-  dbgs() << " [" << getDirectory() << "/" << getFilename() << " ]";
+  OS << " [" << getDirectory() << "/" << getFilename() << "]";
 }
 
-/// dump - Print type.
-void DIType::dump() const {
+/// print - Print type.
+void DIType::print(raw_ostream &OS) const {
   if (!DbgNode) return;
 
   StringRef Res = getName();
   if (!Res.empty())
-    dbgs() << " [" << Res << "] ";
+    OS << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  dbgs() << " [" << dwarf::TagString(Tag) << "] ";
+  OS << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
-  getCompileUnit().dump();
-  dbgs() << " ["
-         << getLineNumber() << ", "
-         << getSizeInBits() << ", "
-         << getAlignInBits() << ", "
-         << getOffsetInBits()
+  getCompileUnit().print(OS);
+  OS << " ["
+         << "line " << getLineNumber() << ", "
+         << getSizeInBits() << " bits, "
+         << getAlignInBits() << " bit alignment, "
+         << getOffsetInBits() << " bit offset"
          << "] ";
 
   if (isPrivate())
-    dbgs() << " [private] ";
+    OS << " [private] ";
   else if (isProtected())
-    dbgs() << " [protected] ";
+    OS << " [protected] ";
 
   if (isForwardDecl())
-    dbgs() << " [fwd] ";
+    OS << " [fwd] ";
 
   if (isBasicType())
-    DIBasicType(DbgNode).dump();
+    DIBasicType(DbgNode).print(OS);
   else if (isDerivedType())
-    DIDerivedType(DbgNode).dump();
+    DIDerivedType(DbgNode).print(OS);
   else if (isCompositeType())
-    DICompositeType(DbgNode).dump();
+    DICompositeType(DbgNode).print(OS);
   else {
-    dbgs() << "Invalid DIType\n";
+    OS << "Invalid DIType\n";
     return;
   }
 
-  dbgs() << "\n";
+  OS << "\n";
 }
 
-/// dump - Print basic type.
-void DIBasicType::dump() const {
-  dbgs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+/// print - Print basic type.
+void DIBasicType::print(raw_ostream &OS) const {
+  OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
 }
 
-/// dump - Print derived type.
-void DIDerivedType::dump() const {
-  dbgs() << "\n\t Derived From: "; getTypeDerivedFrom().dump();
+/// print - Print derived type.
+void DIDerivedType::print(raw_ostream &OS) const {
+  OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
 }
 
-/// dump - Print composite type.
-void DICompositeType::dump() const {
+/// print - Print composite type.
+void DICompositeType::print(raw_ostream &OS) const {
   DIArray A = getTypeArray();
-  dbgs() << " [" << A.getNumElements() << " elements]";
+  OS << " [" << A.getNumElements() << " elements]";
 }
 
-/// dump - Print global.
-void DIGlobal::dump() const {
+/// print - Print subprogram.
+void DISubprogram::print(raw_ostream &OS) const {
   StringRef Res = getName();
   if (!Res.empty())
-    dbgs() << " [" << Res << "] ";
+    OS << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  dbgs() << " [" << dwarf::TagString(Tag) << "] ";
+  OS << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
-  getCompileUnit().dump();
-  dbgs() << " [" << getLineNumber() << "] ";
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
 
   if (isLocalToUnit())
-    dbgs() << " [local] ";
+    OS << " [local] ";
 
   if (isDefinition())
-    dbgs() << " [def] ";
+    OS << " [def] ";
 
-  if (isGlobalVariable())
-    DIGlobalVariable(DbgNode).dump();
-
-  dbgs() << "\n";
+  OS << "\n";
 }
 
-/// dump - Print subprogram.
-void DISubprogram::dump() const {
+/// print - Print global variable.
+void DIGlobalVariable::print(raw_ostream &OS) const {
+  OS << " [";
   StringRef Res = getName();
   if (!Res.empty())
-    dbgs() << " [" << Res << "] ";
+    OS << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  dbgs() << " [" << dwarf::TagString(Tag) << "] ";
+  OS << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
-  getCompileUnit().dump();
-  dbgs() << " [" << getLineNumber() << "] ";
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
 
   if (isLocalToUnit())
-    dbgs() << " [local] ";
+    OS << " [local] ";
 
   if (isDefinition())
-    dbgs() << " [def] ";
+    OS << " [def] ";
+
+  if (isGlobalVariable())
+    DIGlobalVariable(DbgNode).print(OS);
+  OS << "]\n";
+}
+
+/// print - Print variable.
+void DIVariable::print(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+  getType().print(OS);
+  OS << "\n";
+
+  // FIXME: Dump complex addresses
+}
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print compile unit to dbgs() with a newline.
+void DICompileUnit::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print type to dbgs() with a newline.
+void DIType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
 
-  dbgs() << "\n";
+/// dump - Print basic type to dbgs() with a newline.
+void DIBasicType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print derived type to dbgs() with a newline.
+void DIDerivedType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print composite type to dbgs() with a newline.
+void DICompositeType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print subprogram to dbgs() with a newline.
+void DISubprogram::dump() const {
+  print(dbgs()); dbgs() << '\n';
 }
 
 /// dump - Print global variable.
 void DIGlobalVariable::dump() const {
-  dbgs() << " [";
-  getGlobal()->dump();
-  dbgs() << "] ";
+  print(dbgs()); dbgs() << '\n';
 }
 
 /// dump - Print variable.
 void DIVariable::dump() const {
-  StringRef Res = getName();
-  if (!Res.empty())
-    dbgs() << " [" << Res << "] ";
-
-  getCompileUnit().dump();
-  dbgs() << " [" << getLineNumber() << "] ";
-  getType().dump();
-  dbgs() << "\n";
-
-  // FIXME: Dump complex addresses
+  print(dbgs()); dbgs() << '\n';
 }
 
 //===----------------------------------------------------------------------===//
@@ -641,7 +659,7 @@ DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
     Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
   else
     for (unsigned i = 0; i != NumTys; ++i)
-      Elts.push_back(Tys[i].getNode());
+      Elts.push_back(Tys[i]);
 
   return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size()));
 }
@@ -694,7 +712,7 @@ DIFile DIFactory::CreateFile(StringRef Filename,
     GetTagConstant(dwarf::DW_TAG_file_type),
     MDString::get(VMContext, Filename),
     MDString::get(VMContext, Directory),
-    CU.getNode()
+    CU
   };
 
   return DIFile(MDNode::get(VMContext, &Elts[0], 4));
@@ -722,9 +740,9 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
                                        unsigned Encoding) {
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_base_type),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -747,9 +765,9 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
                                          unsigned Encoding) {
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_base_type),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     SizeInBits,
     AlignInBits,
@@ -766,7 +784,7 @@ DIType DIFactory::CreateArtificialType(DIType Ty) {
     return Ty;
 
   SmallVector<Value *, 9> Elts;
-  MDNode *N = Ty.getNode();
+  MDNode *N = Ty;
   assert (N && "Unexpected input DIType!");
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     if (Value *V = N->getOperand(i))
@@ -798,15 +816,15 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            DIType DerivedFrom) {
   Value *Elts[] = {
     GetTagConstant(Tag),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom.getNode(),
+    DerivedFrom,
   };
   return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
 }
@@ -826,15 +844,15 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
                                              DIType DerivedFrom) {
   Value *Elts[] = {
     GetTagConstant(Tag),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     SizeInBits,
     AlignInBits,
     OffsetInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom.getNode(),
+    DerivedFrom,
   };
   return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
 }
@@ -857,16 +875,16 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
 
   Value *Elts[] = {
     GetTagConstant(Tag),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom.getNode(),
-    Elements.getNode(),
+    DerivedFrom,
+    Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
     ContainingType
   };
@@ -890,16 +908,16 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
 
   Value *Elts[] = {
     GetTagConstant(Tag),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     SizeInBits,
     AlignInBits,
     OffsetInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom.getNode(),
-    Elements.getNode(),
+    DerivedFrom,
+    Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
   };
   return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
@@ -925,18 +943,18 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
     MDString::get(VMContext, DisplayName),
     MDString::get(VMContext, LinkageName),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty.getNode(),
+    Ty,
     ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
     ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
     ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
     ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
-    ContainingType.getNode(),
+    ContainingType,
     ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized)
   };
@@ -947,9 +965,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
 /// given declaration. 
 DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) {
   if (SPDeclaration.isDefinition())
-    return DISubprogram(SPDeclaration.getNode());
+    return DISubprogram(SPDeclaration);
 
-  MDNode *DeclNode = SPDeclaration.getNode();
+  MDNode *DeclNode = SPDeclaration;
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -982,13 +1000,13 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_variable),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
     MDString::get(VMContext, DisplayName),
     MDString::get(VMContext, LinkageName),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty.getNode(),
+    Ty,
     ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
     ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
     Val
@@ -1010,16 +1028,24 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
                                      StringRef Name,
                                      DIFile F,
                                      unsigned LineNo,
-                                     DIType Ty) {
+                                     DIType Ty, bool AlwaysPreserve) {
   Value *Elts[] = {
     GetTagConstant(Tag),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty.getNode(),
+    Ty,
   };
-  return DIVariable(MDNode::get(VMContext, &Elts[0], 6));
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], 6);
+  if (AlwaysPreserve) {
+    // The optimizer may remove local variable. If there is an interest
+    // to preserve variable info in such situation then stash it in a
+    // named mdnode.
+    NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv");
+    NMD->addOperand(Node);
+  }
+  return DIVariable(Node);
 }
 
 
@@ -1033,11 +1059,11 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
                                             SmallVector<Value *, 9> &addr) {
   SmallVector<Value *, 9> Elts;
   Elts.push_back(GetTagConstant(Tag));
-  Elts.push_back(Context.getNode());
+  Elts.push_back(Context);
   Elts.push_back(MDString::get(VMContext, Name));
-  Elts.push_back(F.getNode());
+  Elts.push_back(F);
   Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
-  Elts.push_back(Ty.getNode());
+  Elts.push_back(Ty);
   Elts.insert(Elts.end(), addr.begin(), addr.end());
 
   return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size()));
@@ -1050,7 +1076,7 @@ DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
                                              unsigned LineNo, unsigned Col) {
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_lexical_block),
-    Context.getNode(),
+    Context,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), Col)
   };
@@ -1064,9 +1090,9 @@ DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name,
                                        unsigned LineNo) {
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_namespace),
-    Context.getNode(),
+    Context,
     MDString::get(VMContext, Name),
-    F.getNode(),
+    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
   };
   return DINameSpace(MDNode::get(VMContext, &Elts[0], 5));
@@ -1078,8 +1104,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
   Value *Elts[] = {
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
-    S.getNode(),
-    OrigLoc.getNode(),
+    S,
+    OrigLoc,
   };
   return DILocation(MDNode::get(VMContext, &Elts[0], 4));
 }
@@ -1090,7 +1116,7 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
  Value *Elts[] = {
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
-    S.getNode(),
+    S,
     OrigLoc
   };
   return DILocation(MDNode::get(VMContext, &Elts[0], 4));
@@ -1104,12 +1130,12 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
 Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
                                       Instruction *InsertBefore) {
   assert(Storage && "no storage passed to dbg.declare");
-  assert(D.getNode() && "empty DIVariable passed to dbg.declare");
+  assert(D.Verify() && "empty DIVariable passed to dbg.declare");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
-                    D.getNode() };
+                    D };
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
 }
 
@@ -1117,12 +1143,12 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
 Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
                                       BasicBlock *InsertAtEnd) {
   assert(Storage && "no storage passed to dbg.declare");
-  assert(D.getNode() && "empty DIVariable passed to dbg.declare");
+  assert(D.Verify() && "invalid DIVariable passed to dbg.declare");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
-                    D.getNode() };
+                    D };
 
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
@@ -1136,13 +1162,13 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable D,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
-  assert(D.getNode() && "empty DIVariable passed to dbg.value");
+  assert(D.Verify() && "invalid DIVariable passed to dbg.value");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
   Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
-                    D.getNode() };
+                    D };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
 }
 
@@ -1151,13 +1177,13 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable D,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
-  assert(D.getNode() && "empty DIVariable passed to dbg.value");
+  assert(D.Verify() && "invalid DIVariable passed to dbg.value");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
   Value *Args[] = { MDNode::get(V->getContext(), &V, 1), 
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
-                    D.getNode() };
+                    D };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
 }
 
@@ -1182,11 +1208,11 @@ void DebugInfoFinder::processModule(Module &M) {
         DIDescriptor Scope(Loc.getScope(Ctx));
         
         if (Scope.isCompileUnit())
-          addCompileUnit(DICompileUnit(Scope.getNode()));
+          addCompileUnit(DICompileUnit(Scope));
         else if (Scope.isSubprogram())
-          processSubprogram(DISubprogram(Scope.getNode()));
+          processSubprogram(DISubprogram(Scope));
         else if (Scope.isLexicalBlock())
-          processLexicalBlock(DILexicalBlock(Scope.getNode()));
+          processLexicalBlock(DILexicalBlock(Scope));
         
         if (MDNode *IA = Loc.getInlinedAt(Ctx))
           processLocation(DILocation(IA));
@@ -1208,13 +1234,13 @@ void DebugInfoFinder::processModule(Module &M) {
 /// processLocation - Process DILocation.
 void DebugInfoFinder::processLocation(DILocation Loc) {
   if (!Loc.Verify()) return;
-  DIDescriptor S(Loc.getScope().getNode());
+  DIDescriptor S(Loc.getScope());
   if (S.isCompileUnit())
-    addCompileUnit(DICompileUnit(S.getNode()));
+    addCompileUnit(DICompileUnit(S));
   else if (S.isSubprogram())
-    processSubprogram(DISubprogram(S.getNode()));
+    processSubprogram(DISubprogram(S));
   else if (S.isLexicalBlock())
-    processLexicalBlock(DILexicalBlock(S.getNode()));
+    processLexicalBlock(DILexicalBlock(S));
   processLocation(Loc.getOrigLocation());
 }
 
@@ -1225,18 +1251,18 @@ void DebugInfoFinder::processType(DIType DT) {
 
   addCompileUnit(DT.getCompileUnit());
   if (DT.isCompositeType()) {
-    DICompositeType DCT(DT.getNode());
+    DICompositeType DCT(DT);
     processType(DCT.getTypeDerivedFrom());
     DIArray DA = DCT.getTypeArray();
     for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
       DIDescriptor D = DA.getElement(i);
       if (D.isType())
-        processType(DIType(D.getNode()));
+        processType(DIType(D));
       else if (D.isSubprogram())
-        processSubprogram(DISubprogram(D.getNode()));
+        processSubprogram(DISubprogram(D));
     }
   } else if (DT.isDerivedType()) {
-    DIDerivedType DDT(DT.getNode());
+    DIDerivedType DDT(DT);
     processType(DDT.getTypeDerivedFrom());
   }
 }
@@ -1245,9 +1271,9 @@ void DebugInfoFinder::processType(DIType DT) {
 void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
   DIScope Context = LB.getContext();
   if (Context.isLexicalBlock())
-    return processLexicalBlock(DILexicalBlock(Context.getNode()));
+    return processLexicalBlock(DILexicalBlock(Context));
   else
-    return processSubprogram(DISubprogram(Context.getNode()));
+    return processSubprogram(DISubprogram(Context));
 }
 
 /// processSubprogram - Process DISubprogram.
@@ -1267,7 +1293,7 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
   if (!DV.isVariable())
     return;
 
-  if (!NodesSeen.insert(DV.getNode()))
+  if (!NodesSeen.insert(DV))
     return;
 
   addCompileUnit(DIVariable(N).getCompileUnit());
@@ -1279,10 +1305,10 @@ bool DebugInfoFinder::addType(DIType DT) {
   if (!DT.isValid())
     return false;
 
-  if (!NodesSeen.insert(DT.getNode()))
+  if (!NodesSeen.insert(DT))
     return false;
 
-  TYs.push_back(DT.getNode());
+  TYs.push_back(DT);
   return true;
 }
 
@@ -1291,34 +1317,34 @@ bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
   if (!CU.Verify())
     return false;
 
-  if (!NodesSeen.insert(CU.getNode()))
+  if (!NodesSeen.insert(CU))
     return false;
 
-  CUs.push_back(CU.getNode());
+  CUs.push_back(CU);
   return true;
 }
 
 /// addGlobalVariable - Add global variable into GVs.
 bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
-  if (!DIDescriptor(DIG.getNode()).isGlobalVariable())
+  if (!DIDescriptor(DIG).isGlobalVariable())
     return false;
 
-  if (!NodesSeen.insert(DIG.getNode()))
+  if (!NodesSeen.insert(DIG))
     return false;
 
-  GVs.push_back(DIG.getNode());
+  GVs.push_back(DIG);
   return true;
 }
 
 // addSubprogram - Add subprgoram into SPs.
 bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
-  if (!DIDescriptor(SP.getNode()).isSubprogram())
+  if (!DIDescriptor(SP).isSubprogram())
     return false;
 
-  if (!NodesSeen.insert(SP.getNode()))
+  if (!NodesSeen.insert(SP))
     return false;
 
-  SPs.push_back(SP.getNode());
+  SPs.push_back(SP);
   return true;
 }
 
@@ -1333,8 +1359,8 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) {
     DIDescriptor DIG(cast_or_null<MDNode>(NMD->getOperand(i)));
     if (!DIG.isGlobalVariable())
       continue;
-    if (DIGlobalVariable(DIG.getNode()).getGlobal() == V)
-      return DIG.getNode();
+    if (DIGlobalVariable(DIG).getGlobal() == V)
+      return DIG;
   }
   return 0;
 }
@@ -1406,13 +1432,13 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName,
 }
 
 /// getDISubprogram - Find subprogram that is enclosing this scope.
-DISubprogram llvm::getDISubprogram(MDNode *Scope) {
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
   DIDescriptor D(Scope);
   if (D.isSubprogram())
     return DISubprogram(Scope);
   
   if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(Scope).getContext().getNode());
+    return getDISubprogram(DILexicalBlock(Scope).getContext());
   
   return DISubprogram();
 }
@@ -1420,10 +1446,10 @@ DISubprogram llvm::getDISubprogram(MDNode *Scope) {
 /// getDICompositeType - Find underlying composite type.
 DICompositeType llvm::getDICompositeType(DIType T) {
   if (T.isCompositeType())
-    return DICompositeType(T.getNode());
+    return DICompositeType(T);
   
   if (T.isDerivedType())
-    return getDICompositeType(DIDerivedType(T.getNode()).getTypeDerivedFrom());
+    return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
   
   return DICompositeType();
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 6271371..98dbb69 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -175,7 +175,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
       if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
         // Each argument to a call takes on average one instruction to set up.
         NumInsts += CS.arg_size();
-        ++NumCalls;
+
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
       }
     }
     
@@ -455,6 +459,11 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
   else
     CallerMetrics.NumInsts = 0;
   
-  // We are not updating the argumentweights. We have already determined that
+  // We are not updating the argument weights. We have already determined that
   // Caller is a fairly large function, so we accept the loss of precision.
 }
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+  CachedFunctionInfo.clear();
+}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 25d4f95..a031cbc 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -179,6 +179,7 @@ bool Lint::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<TargetData>();
   visit(F);
   dbgs() << MessagesStr.str();
+  Messages.clear();
   return false;
 }
 
@@ -193,7 +194,6 @@ void Lint::visitCallSite(CallSite CS) {
   Instruction &I = *CS.getInstruction();
   Value *Callee = CS.getCalledValue();
 
-  // TODO: Check function alignment?
   visitMemoryReference(I, Callee, 0, 0, MemRef::Callee);
 
   if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
@@ -219,7 +219,15 @@ void Lint::visitCallSite(CallSite CS) {
     // TODO: Check sret attribute.
   }
 
-  // TODO: Check the "tail" keyword constraints.
+  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+         AI != AE; ++AI) {
+      Value *Obj = (*AI)->getUnderlyingObject();
+      Assert1(!isa<AllocaInst>(Obj) && !isa<VAArgInst>(Obj),
+              "Undefined behavior: Call with \"tail\" keyword references "
+              "alloca or va_arg", &I);
+    }
+
 
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
     switch (II->getIntrinsicID()) {
@@ -280,8 +288,11 @@ void Lint::visitCallSite(CallSite CS) {
       break;
 
     case Intrinsic::stackrestore:
+      // Stackrestore doesn't read or write memory, but it sets the
+      // stack pointer, which the compiler may read from or write to
+      // at any time, so check it for both readability and writeability.
       visitMemoryReference(I, CS.getArgument(0), 0, 0,
-                           MemRef::Read);
+                           MemRef::Read | MemRef::Write);
       break;
     }
 }
@@ -482,14 +493,10 @@ void llvm::lintFunction(const Function &f) {
 }
 
 /// lintModule - Check a module for errors, printing messages on stderr.
-/// Return true if the module is corrupt.
 ///
-void llvm::lintModule(const Module &M, std::string *ErrorInfo) {
+void llvm::lintModule(const Module &M) {
   PassManager PM;
   Lint *V = new Lint();
   PM.add(V);
   PM.run(const_cast<Module&>(M));
-
-  if (ErrorInfo)
-    *ErrorInfo = V->MessagesStr.str();
 }
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 0000000..556d4c8
--- /dev/null
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,86 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+  class ModuleDebugInfoPrinter : public ModulePass {
+    DebugInfoFinder Finder;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ModuleDebugInfoPrinter() : ModulePass(&ID) {}
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const;
+  };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+static RegisterPass<ModuleDebugInfoPrinter>
+X("module-debuginfo",
+  "Decodes module-level debug info", false, true);
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+  return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+  Finder.processModule(M);
+  return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+  for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+       E = Finder.compile_unit_end(); I != E; ++I) {
+    O << "Compile Unit: ";
+    DICompileUnit(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+       E = Finder.subprogram_end(); I != E; ++I) {
+    O << "Subprogram: ";
+    DISubprogram(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+       E = Finder.global_variable_end(); I != E; ++I) {
+    O << "GlobalVariable: ";
+    DIGlobalVariable(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.type_begin(),
+       E = Finder.type_end(); I != E; ++I) {
+    O << "Type: ";
+    DIType(*I).print(O);
+    O << '\n';
+  }
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 46f3cbc..9b4370f 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -537,6 +537,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(coldcc);
   KEYWORD(x86_stdcallcc);
   KEYWORD(x86_fastcallcc);
+  KEYWORD(x86_thiscallcc);
   KEYWORD(arm_apcscc);
   KEYWORD(arm_aapcscc);
   KEYWORD(arm_aapcs_vfpcc);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 3b08ca1..226d8d3 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1074,6 +1074,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
 ///   ::= 'coldcc'
 ///   ::= 'x86_stdcallcc'
 ///   ::= 'x86_fastcallcc'
+///   ::= 'x86_thiscallcc'
 ///   ::= 'arm_apcscc'
 ///   ::= 'arm_aapcscc'
 ///   ::= 'arm_aapcs_vfpcc'
@@ -1088,6 +1089,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
   case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
   case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
   case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 3ac9169..5eed170 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -68,7 +68,7 @@ namespace lltok {
     kw_c,
 
     kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
-    kw_x86_stdcallcc, kw_x86_fastcallcc,
+    kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
     kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
     kw_msp430_intrcc,
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index ded4b3f..5a0c27b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -205,16 +205,10 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
       // .weak_definition _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
-    } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) {
+    } else if (MAI->getLinkOnceDirective() != 0) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
-      // FIXME: linkonce should be a section attribute, handled by COFF Section
-      // assignment.
-      // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce
-      // .linkonce discard
-      // FIXME: It would be nice to use .linkonce samesize for non-common
-      // globals.
-      OutStreamer.EmitRawText(StringRef(LinkOnce));
+      //NOTE: linkonce is handled by the section the symbol was assigned to.
     } else {
       // .weak _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
@@ -247,6 +241,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (EmitSpecialLLVMGlobal(GV))
     return;
 
+  if (isVerbose()) {
+    WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+                   /*PrintType=*/false, GV->getParent());
+    OutStreamer.GetCommentOS() << '\n';
+  }
+  
   MCSymbol *GVSym = Mang->getSymbol(GV);
   EmitVisibility(GVSym, GV->getVisibility());
 
@@ -316,17 +316,57 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
     return;
   }
+  
+  // Handle thread local data for mach-o which requires us to output an
+  // additional structure of data and mangle the original symbol so that we
+  // can reference it later.
+  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+    // Emit the .tbss symbol
+    MCSymbol *MangSym = 
+      OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+    
+    if (GVKind.isThreadBSS())
+      OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+    else if (GVKind.isThreadData()) {
+      OutStreamer.SwitchSection(TheSection);
+
+      EmitLinkage(GV->getLinkage(), MangSym);
+      EmitAlignment(AlignLog, GV);      
+      OutStreamer.EmitLabel(MangSym);
+      
+      EmitGlobalConstant(GV->getInitializer());
+    }
+    
+    OutStreamer.AddBlankLine();
+    
+    // Emit the variable struct for the runtime.
+    const MCSection *TLVSect 
+      = getObjFileLowering().getTLSExtraDataSection();
+      
+    OutStreamer.SwitchSection(TLVSect);
+    // Emit the linkage here.
+    EmitLinkage(GV->getLinkage(), GVSym);
+    OutStreamer.EmitLabel(GVSym);
+    
+    // Three pointers in size:
+    //   - __tlv_bootstrap - used to make sure support exists
+    //   - spare pointer, used when mapped by the runtime
+    //   - pointer to mangled symbol above with initializer
+    unsigned PtrSize = TD->getPointerSizeInBits()/8;
+    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"),
+                          PtrSize, 0);
+    OutStreamer.EmitIntValue(0, PtrSize, 0);
+    OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+    
+    OutStreamer.AddBlankLine();
+    return;
+  }
 
   OutStreamer.SwitchSection(TheSection);
 
   EmitLinkage(GV->getLinkage(), GVSym);
   EmitAlignment(AlignLog, GV);
 
-  if (isVerbose()) {
-    WriteAsOperand(OutStreamer.GetCommentOS(), GV,
-                   /*PrintType=*/false, GV->getParent());
-    OutStreamer.GetCommentOS() << '\n';
-  }
   OutStreamer.EmitLabel(GVSym);
 
   EmitGlobalConstant(GV->getInitializer());
@@ -408,7 +448,13 @@ void AsmPrinter::EmitFunctionHeader() {
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
 /// function.  This can be overridden by targets as required to do custom stuff.
 void AsmPrinter::EmitFunctionEntryLabel() {
-  OutStreamer.EmitLabel(CurrentFnSym);
+  // The function label could have already been emitted if two symbols end up
+  // conflicting due to asm renaming.  Detect this and emit an error.
+  if (CurrentFnSym->isUndefined())
+    return OutStreamer.EmitLabel(CurrentFnSym);
+
+  report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+                     "' label emitted multiple times to assembly file");
 }
 
 
@@ -503,7 +549,7 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   // cast away const; DIetc do not take const operands for some reason.
   DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
   if (V.getContext().isSubprogram())
-    OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+    OS << DISubprogram(V.getContext()).getDisplayName() << ":";
   OS << V.getName() << " <- ";
 
   // Register or immediate value. Register 0 means undef.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 37d10e5..ba6fed2 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -53,6 +53,17 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
   }
   
   SourceMgr SrcMgr;
+
+  // Ensure the buffer is newline terminated.
+  char *TmpString = 0;
+  if (Str.back() != '\n') {
+    TmpString = new char[Str.size() + 2];
+    memcpy(TmpString, Str.data(), Str.size());
+    TmpString[Str.size()] = '\n';
+    TmpString[Str.size() + 1] = 0;
+    isNullTerminated = true;
+    Str = TmpString;
+  }
   
   // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
   LLVMContext &LLVMCtx = MMI->getModule()->getContext();
@@ -84,6 +95,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
                        /*NoFinalize*/ true);
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
+
+  if (TmpString)
+    delete[] TmpString;
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 9cb8314..d56c094 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -315,6 +315,10 @@ namespace llvm {
     ///
     virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
+    /// getValue - Get MCSymbol.
+    ///
+    const MCSymbol *getValue()       const { return Label; }
+
     /// SizeOf - Determine size of label value in bytes.
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index e9e9ba5..890507c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -47,6 +47,10 @@ static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
 static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
      cl::desc("Disable debug info printing"));
 
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+     cl::desc("Make an absense of debug location information explicit."),
+     cl::init(false));
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -78,12 +82,12 @@ class CompileUnit {
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
   /// variables to debug information entries.
   /// FIXME : Rename GVToDieMap -> NodeToDieMap
-  DenseMap<MDNode *, DIE *> GVToDieMap;
+  DenseMap<const MDNode *, DIE *> GVToDieMap;
 
   /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
   /// descriptors to debug information entries using a DIEEntry proxy.
   /// FIXME : Rename
-  DenseMap<MDNode *, DIEEntry *> GVToDIEEntryMap;
+  DenseMap<const MDNode *, DIEEntry *> GVToDIEEntryMap;
 
   /// Globals - A map of globally visible named entities for this unit.
   ///
@@ -119,24 +123,24 @@ public:
 
   /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
-  DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); }
+  DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); }
 
   /// insertDIE - Insert DIE into the map.
-  void insertDIE(MDNode *N, DIE *D) {
+  void insertDIE(const MDNode *N, DIE *D) {
     GVToDieMap.insert(std::make_pair(N, D));
   }
 
   /// getDIEEntry - Returns the debug information entry for the speciefied
   /// debug variable.
-  DIEEntry *getDIEEntry(MDNode *N) { 
-    DenseMap<MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
+  DIEEntry *getDIEEntry(const MDNode *N) { 
+    DenseMap<const MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
     if (I == GVToDIEEntryMap.end())
       return NULL;
     return I->second;
   }
 
   /// insertDIEEntry - Insert debug information entry into the map.
-  void insertDIEEntry(MDNode *N, DIEEntry *E) {
+  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
     GVToDIEEntryMap.insert(std::make_pair(N, E));
   }
 
@@ -164,31 +168,18 @@ public:
 ///
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
-  unsigned FrameIndex;               // Variable frame index.
-  const MachineInstr *DbgValueMInsn; // DBG_VALUE
-  // DbgValueLabel - DBG_VALUE is effective from this label.
-  MCSymbol *DbgValueLabel;
-  DbgVariable *const AbstractVar;    // Abstract variable for this variable.
-  DIE *TheDIE;
+  DIE *TheDIE;                       // Variable DIE.
+  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
 public:
   // AbsVar may be NULL.
-  DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar)
-    : Var(V), FrameIndex(I), DbgValueMInsn(0), 
-      DbgValueLabel(0), AbstractVar(AbsVar), TheDIE(0) {}
-  DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar)
-    : Var(V), FrameIndex(0), DbgValueMInsn(MI), DbgValueLabel(0),
-      AbstractVar(AbsVar), TheDIE(0)
-    {}
+  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
 
   // Accessors.
   DIVariable getVariable()           const { return Var; }
-  unsigned getFrameIndex()           const { return FrameIndex; }
-  const MachineInstr *getDbgValue()  const { return DbgValueMInsn; }
-  MCSymbol *getDbgValueLabel()       const { return DbgValueLabel; }
-  void setDbgValueLabel(MCSymbol *L)       { DbgValueLabel = L; }
-  DbgVariable *getAbstractVariable() const { return AbstractVar; }
   void setDIE(DIE *D)                      { TheDIE = D; }
   DIE *getDIE()                      const { return TheDIE; }
+  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
+  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
 };
 
 //===----------------------------------------------------------------------===//
@@ -204,7 +195,7 @@ class DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
   DIDescriptor Desc;                  // Debug info descriptor for scope.
   // Location at which this scope is inlined.
-  AssertingVH<MDNode> InlinedAtLocation;  
+  AssertingVH<const MDNode> InlinedAtLocation;  
   bool AbstractScope;                 // Abstract Scope
   const MachineInstr *LastInsn;       // Last instruction of this scope.
   const MachineInstr *FirstInsn;      // First instruction of this scope.
@@ -217,7 +208,7 @@ class DbgScope {
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
-  DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
+  DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
       LastInsn(0), FirstInsn(0),
       DFSIn(0), DFSOut(0), IndentLevel(0) {}
@@ -227,8 +218,8 @@ public:
   DbgScope *getParent()          const { return Parent; }
   void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
-  MDNode *getInlinedAt()         const { return InlinedAtLocation; }
-  MDNode *getScopeNode()         const { return Desc.getNode(); }
+  const MDNode *getInlinedAt()         const { return InlinedAtLocation; }
+  const MDNode *getScopeNode()         const { return Desc; }
   const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
   const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
   const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
@@ -300,7 +291,7 @@ public:
 void DbgScope::dump() const {
   raw_ostream &err = dbgs();
   err.indent(IndentLevel);
-  MDNode *N = Desc.getNode();
+  const MDNode *N = Desc;
   N->dump();
   if (AbstractScope)
     err << "Abstract Scope\n";
@@ -322,15 +313,15 @@ DbgScope::~DbgScope() {
 }
 
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
-  : Asm(A), MMI(Asm->MMI), ModuleCU(0),
+  : Asm(A), MMI(Asm->MMI), FirstCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), 
     CurrentFnDbgScope(0), PrevLabel(NULL) {
   NextStringPoolNumber = 0;
       
   DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
-  DwarfDebugRangeSectionSym = 0;
-  FunctionBeginSym = 0;
+  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; 
+  FunctionBeginSym = FunctionEndSym = 0;
   if (TimePassesIsEnabled) {
       NamedRegionTimer T(DbgTimerName, DWARFGroupName);
       beginModule(M);
@@ -444,8 +435,8 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
-  // If there is no compile unit specified, don't add a line #.
-  if (!V->getCompileUnit().Verify())
+  // Verify variable.
+  if (!V->Verify())
     return;
 
   unsigned Line = V->getLineNumber();
@@ -458,9 +449,9 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
-  // If there is no compile unit specified, don't add a line #.
-  if (!G->getCompileUnit().Verify())
+void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) {
+  // Verify global variable.
+  if (!G->Verify())
     return;
 
   unsigned Line = G->getLineNumber();
@@ -474,8 +465,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
-  // If there is no compile unit specified, don't add a line #.
-  if (!SP->getCompileUnit().Verify())
+  // Verify subprogram.
+  if (!SP->Verify())
     return;
   // If the line number is 0, don't add it.
   if (SP->getLineNumber() == 0)
@@ -494,9 +485,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
-  // If there is no compile unit specified, don't add a line #.
-  DICompileUnit CU = Ty->getCompileUnit();
-  if (!CU.Verify())
+  // Verify type.
+  if (!Ty->Verify())
     return;
 
   unsigned Line = Ty->getLineNumber();
@@ -512,8 +502,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
-  // If there is no compile unit specified, don't add a line #.
-  if (!NS->getCompileUnit().Verify())
+  // Verify namespace.
+  if (!NS->Verify())
     return;
 
   unsigned Line = NS->getLineNumber();
@@ -560,16 +550,16 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
   unsigned tag = Ty.getTag();
 
   if (tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy = DIDerivedType(Ty.getNode());
+    DIDerivedType DTy = DIDerivedType(Ty);
     subType = DTy.getTypeDerivedFrom();
   }
 
-  DICompositeType blockStruct = DICompositeType(subType.getNode());
+  DICompositeType blockStruct = DICompositeType(subType);
   DIArray Elements = blockStruct.getTypeArray();
 
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
-    DIDerivedType DT = DIDerivedType(Element.getNode());
+    DIDerivedType DT = DIDerivedType(Element);
     if (Name == DT.getName())
       return (DT.getTypeDerivedFrom());
   }
@@ -700,12 +690,12 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
   StringRef varName = VD.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy = DIDerivedType(Ty.getNode());
+    DIDerivedType DTy = DIDerivedType(Ty);
     TmpTy = DTy.getTypeDerivedFrom();
     isPointer = true;
   }
 
-  DICompositeType blockStruct = DICompositeType(TmpTy.getNode());
+  DICompositeType blockStruct = DICompositeType(TmpTy);
 
   // Find the __forwarding field and the variable field in the __Block_byref
   // struct.
@@ -715,7 +705,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
 
   for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Fields.getElement(i);
-    DIDerivedType DT = DIDerivedType(Element.getNode());
+    DIDerivedType DT = DIDerivedType(Element);
     StringRef fieldName = DT.getName();
     if (fieldName == "__forwarding")
       forwardingField = Element;
@@ -725,9 +715,9 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
 
   // Get the offsets for the forwarding field and the variable field.
   unsigned forwardingFieldOffset =
-    DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
+    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
   unsigned varFieldOffset =
-    DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
+    DIDerivedType(varField).getOffsetInBits() >> 3;
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
@@ -813,7 +803,7 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
 }
 
 /// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV,
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS,
                                     const MachineOperand &MO) {
   assert (MO.isReg() && "Invalid machine operand!");
   if (!MO.getReg())
@@ -821,26 +811,26 @@ bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV,
   MachineLocation Location;
   Location.set(MO.getReg());
   addAddress(Die, dwarf::DW_AT_location, Location);
-  if (MCSymbol *VS = DV->getDbgValueLabel())
+  if (VS)
     addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV, 
+bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS,
                                   const MachineOperand &MO) {
   assert (MO.isImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   unsigned Imm = MO.getImm();
   addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  if (MCSymbol *VS = DV->getDbgValueLabel())
+  if (VS)
     addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, 
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
                                     const MachineOperand &MO) {
   assert (MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
@@ -862,10 +852,8 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV,
             (unsigned char)0xFF & FltPtr[Start]);
   
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  
-  if (MCSymbol *VS = DV->getDbgValueLabel())
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-             VS);
+  if (VS)
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true; 
 }
 
@@ -873,34 +861,35 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV,
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
   if (Context.isType()) {
-    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode()));
+    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
     ContextDIE->addChild(Die);
   } else if (Context.isNameSpace()) {
-    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode()));
+    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
     ContextDIE->addChild(Die);
-  } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode()))
+  } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
     ContextDIE->addChild(Die);
   else 
-    ModuleCU->addDie(Die);
+    getCompileUnit(Context)->addDie(Die);
 }
 
 /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
 /// given DIType.
 DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
-  DIE *TyDIE = ModuleCU->getDIE(Ty.getNode());
+  CompileUnit *TypeCU = getCompileUnit(Ty);
+  DIE *TyDIE = TypeCU->getDIE(Ty);
   if (TyDIE)
     return TyDIE;
 
   // Create new type.
   TyDIE = new DIE(dwarf::DW_TAG_base_type);
-  ModuleCU->insertDIE(Ty.getNode(), TyDIE);
+  TypeCU->insertDIE(Ty, TyDIE);
   if (Ty.isBasicType())
-    constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode()));
+    constructTypeDIE(*TyDIE, DIBasicType(Ty));
   else if (Ty.isCompositeType())
-    constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode()));
+    constructTypeDIE(*TyDIE, DICompositeType(Ty));
   else {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode()));
+    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
   }
 
   addToContextOwner(TyDIE, Ty.getContext());
@@ -909,11 +898,12 @@ DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
 
 /// addType - Add a new type attribute to the specified entity.
 void DwarfDebug::addType(DIE *Entity, DIType Ty) {
-  if (!Ty.isValid())
+  if (!Ty.Verify())
     return;
 
   // Check for pre-existence.
-  DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode());
+  CompileUnit *TypeCU = getCompileUnit(Ty);
+  DIEEntry *Entry = TypeCU->getDIEEntry(Ty);
   // If it exists then use the existing value.
   if (Entry) {
     Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
@@ -925,7 +915,7 @@ void DwarfDebug::addType(DIE *Entity, DIType Ty) {
 
   // Set up proxy.
   Entry = createDIEEntry(Buffer);
-  ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
+  TypeCU->insertDIEEntry(Ty, Entry);
 
   Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
 }
@@ -994,9 +984,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // Add enumerators to enumeration type.
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIE *ElemDie = NULL;
-      DIDescriptor Enum(Elements.getElement(i).getNode());
+      DIDescriptor Enum(Elements.getElement(i));
       if (Enum.isEnumerator()) {
-        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum.getNode()));
+        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
         Buffer.addChild(ElemDie);
       }
     }
@@ -1006,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // Add return type.
     DIArray Elements = CTy.getTypeArray();
     DIDescriptor RTy = Elements.getElement(0);
-    addType(&Buffer, DIType(RTy.getNode()));
+    addType(&Buffer, DIType(RTy));
 
     // Add prototype flag.
     addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
@@ -1015,7 +1005,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      addType(Arg, DIType(Ty.getNode()));
+      addType(Arg, DIType(Ty));
       Buffer.addChild(Arg);
     }
   }
@@ -1036,9 +1026,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       DIDescriptor Element = Elements.getElement(i);
       DIE *ElemDie = NULL;
       if (Element.isSubprogram())
-        ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
+        ElemDie = createSubprogramDIE(DISubprogram(Element));
       else if (Element.isVariable()) {
-        DIVariable DV(Element.getNode());
+        DIVariable DV(Element);
         ElemDie = new DIE(dwarf::DW_TAG_variable);
         addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
                   DV.getName());
@@ -1047,7 +1037,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
         addSourceLine(ElemDie, &DV);
       } else if (Element.isDerivedType())
-        ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
+        ElemDie = createMemberDIE(DIDerivedType(Element));
       else
         continue;
       Buffer.addChild(ElemDie);
@@ -1062,9 +1052,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
               dwarf::DW_FORM_data1, RLang);
 
     DICompositeType ContainingType = CTy.getContainingType();
-    if (DIDescriptor(ContainingType.getNode()).isCompositeType())
+    if (DIDescriptor(ContainingType).isCompositeType())
       addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, 
-                  getOrCreateTypeDIE(DIType(ContainingType.getNode())));
+                  getOrCreateTypeDIE(DIType(ContainingType)));
     break;
   }
   default:
@@ -1120,22 +1110,23 @@ void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
   DIArray Elements = CTy->getTypeArray();
 
   // Get an anonymous type for index type.
-  DIE *IdxTy = ModuleCU->getIndexTyDie();
+  CompileUnit *TheCU = getCompileUnit(*CTy);
+  DIE *IdxTy = TheCU->getIndexTyDie();
   if (!IdxTy) {
     // Construct an anonymous type for index type.
     IdxTy = new DIE(dwarf::DW_TAG_base_type);
     addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
     addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
             dwarf::DW_ATE_signed);
-    ModuleCU->addDie(IdxTy);
-    ModuleCU->setIndexTyDie(IdxTy);
+    TheCU->addDie(IdxTy);
+    TheCU->setIndexTyDie(IdxTy);
   }
 
   // Add subranges to array type.
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy);
+      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
   }
 }
 
@@ -1262,7 +1253,8 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
 
 /// createSubprogramDIE - Create new DIE using SP.
 DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
-  DIE *SPDie = ModuleCU->getDIE(SP.getNode());
+  CompileUnit *SPCU = getCompileUnit(SP);
+  DIE *SPDie = SPCU->getDIE(SP);
   if (SPDie)
     return SPDie;
 
@@ -1292,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
   if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
     addType(SPDie, SPTy);
   else
-    addType(SPDie, DIType(Args.getElement(0).getNode()));
+    addType(SPDie, DIType(Args.getElement(0)));
 
   unsigned VK = SP.getVirtuality();
   if (VK) {
@@ -1302,7 +1294,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
     addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
     ContainingTypeMap.insert(std::make_pair(SPDie, 
-                                            SP.getContainingType().getNode()));
+                                            SP.getContainingType()));
   }
 
   if (MakeDecl || !SP.isDefinition()) {
@@ -1317,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+        DIType ATy = DIType(DIType(Args.getElement(i)));
         addType(Arg, ATy);
         if (ATy.isArtificial())
           addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
@@ -1335,12 +1327,12 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
     addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
-  ModuleCU->insertDIE(SP.getNode(), SPDie);
+  SPCU->insertDIE(SP, SPDie);
 
   return SPDie;
 }
 
-DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
+DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) {
   assert(N && "Invalid Scope encoding!");
 
   DbgScope *AScope = AbstractScopes.lookup(N);
@@ -1353,7 +1345,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   if (Scope.isLexicalBlock()) {
     DILexicalBlock DB(N);
     DIDescriptor ParentDesc = DB.getContext();
-    Parent = getOrCreateAbstractScope(ParentDesc.getNode());
+    Parent = getOrCreateAbstractScope(ParentDesc);
   }
 
   AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
@@ -1369,14 +1361,14 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
 
 /// isSubprogramContext - Return true if Context is either a subprogram
 /// or another context nested inside a subprogram.
-static bool isSubprogramContext(MDNode *Context) {
+static bool isSubprogramContext(const MDNode *Context) {
   if (!Context)
     return false;
   DIDescriptor D(Context);
   if (D.isSubprogram())
     return true;
   if (D.isType())
-    return isSubprogramContext(DIType(Context).getContext().getNode());
+    return isSubprogramContext(DIType(Context).getContext());
   return false;
 }
 
@@ -1384,8 +1376,9 @@ static bool isSubprogramContext(MDNode *Context) {
 /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
 /// If there are global variables in this scope then create and insert
 /// DIEs for these variables.
-DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
-  DIE *SPDie = ModuleCU->getDIE(SPNode);
+DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
+  CompileUnit *SPCU = getCompileUnit(SPNode);
+  DIE *SPDie = SPCU->getDIE(SPNode);
   assert(SPDie && "Unable to find subprogram DIE!");
   DISubprogram SP(SPNode);
   
@@ -1396,7 +1389,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
   // specification DIE for a function defined inside a function.
   if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
       !SP.getContext().isFile() && 
-      !isSubprogramContext(SP.getContext().getNode())) {
+      !isSubprogramContext(SP.getContext())) {
     addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
     
     // Add arguments. 
@@ -1406,7 +1399,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+        DIType ATy = DIType(DIType(Args.getElement(i)));
         addType(Arg, ATy);
         if (ATy.isArtificial())
           addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
@@ -1416,7 +1409,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
     SPDie = new DIE(dwarf::DW_TAG_subprogram);
     addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, 
                 SPDeclDie);
-    ModuleCU->addDie(SPDie);
+    SPCU->addDie(SPDie);
   }
   
   addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
@@ -1451,18 +1444,18 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
             DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
     for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
-      DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first));
-      DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second));
+      DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+      DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
     }
     DebugRangeSymbols.push_back(NULL);
     DebugRangeSymbols.push_back(NULL);
     return ScopeDIE;
   }
 
-  MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first);
-  MCSymbol *End = LabelsAfterInsn.lookup(RI->second);
+  const MCSymbol *Start = getLabelBeforeInsn(RI->first);
+  const MCSymbol *End = getLabelAfterInsn(RI->second);
 
-  if (Start == 0 || End == 0) return 0;
+  if (End == 0) return 0;
 
   assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
   assert(End->isDefined() && "Invalid end label for an inlined scope!");
@@ -1487,10 +1480,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   // For now, use first instruction range and emit low_pc/high_pc pair and
   // corresponding .debug_inlined section entry for this pair.
   SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
-  MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first);
-  MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second);
+  const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+  const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
-  if (StartLabel == 0 || EndLabel == 0) {
+  if (StartLabel == FunctionBeginSym || EndLabel == 0) {
     assert (0 && "Unexpected Start and End  labels for a inlined scope!");
     return 0;
   }
@@ -1504,8 +1497,9 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   DIScope DS(Scope->getScopeNode());
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
 
-  DISubprogram InlinedSP = getDISubprogram(DS.getNode());
-  DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
+  DISubprogram InlinedSP = getDISubprogram(DS);
+  CompileUnit *TheCU = getCompileUnit(InlinedSP);
+  DIE *OriginDIE = TheCU->getDIE(InlinedSP);
   assert(OriginDIE && "Unable to find Origin DIE!");
   addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, OriginDIE);
@@ -1516,18 +1510,18 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   InlinedSubprogramDIEs.insert(OriginDIE);
 
   // Track the start label for this inlined function.
-  DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
-    I = InlineInfo.find(InlinedSP.getNode());
+  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+    I = InlineInfo.find(InlinedSP);
 
   if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartLabel,
+    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
                                                              ScopeDIE));
-    InlinedSPNodes.push_back(InlinedSP.getNode());
+    InlinedSPNodes.push_back(InlinedSP);
   } else
     I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
   DILocation DL(Scope->getInlinedAt());
-  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
   addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   return ScopeDIE;
@@ -1560,22 +1554,15 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
 
-
   DIE *AbsDIE = NULL;
-  if (DbgVariable *AV = DV->getAbstractVariable())
-    AbsDIE = AV->getDIE();
-
-  if (AbsDIE) {
-    DIScope DS(Scope->getScopeNode());
-    DISubprogram InlinedSP = getDISubprogram(DS.getNode());
-    DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
-    (void) OriginSPDIE;
-    assert(OriginSPDIE && "Unable to find Origin DIE for the SP!");
-    DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
-    assert(AbsDIE && "Unable to find Origin DIE for the Variable!");
+  DenseMap<const DbgVariable *, const DbgVariable *>::iterator
+    V2AVI = VarToAbstractVarMap.find(DV);
+  if (V2AVI != VarToAbstractVarMap.end())
+    AbsDIE = V2AVI->second->getDIE();
+
+  if (AbsDIE)
     addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
                 dwarf::DW_FORM_ref4, AbsDIE);
-  }
   else {
     addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
     addSourceLine(VariableDie, &VD);
@@ -1589,55 +1576,76 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
       addType(VariableDie, VD.getType());
   }
 
+  if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
+    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+  if (Scope->isAbstractScope()) {
+    DV->setDIE(VariableDie);
+    return VariableDie;
+  }
+
   // Add variable address.
-  if (!Scope->isAbstractScope()) {
-    // Check if variable is described by DBG_VALUE instruction.
-    if (const MachineInstr *DVInsn = DV->getDbgValue()) {
-      bool updated = false;
-      // FIXME : Handle getNumOperands != 3 
-      if (DVInsn->getNumOperands() == 3) {
-        if (DVInsn->getOperand(0).isReg())
-          updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0));
-        else if (DVInsn->getOperand(0).isImm())
-          updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0));
-        else if (DVInsn->getOperand(0).isFPImm()) 
-          updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0));
-      } else {
-        MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
-        if (Location.getReg()) {
-          addAddress(VariableDie, dwarf::DW_AT_location, Location);
-          if (MCSymbol *VS = DV->getDbgValueLabel())
-            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                     VS);
-          updated = true;
-        }
-      }
-      if (!updated) {
-        // If variableDie is not updated then DBG_VALUE instruction does not
-        // have valid variable info.
-        delete VariableDie;
-        return NULL;
-      }
-    } 
-    else {
-      MachineLocation Location;
-      unsigned FrameReg;
-      const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-      int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(),
-                                              FrameReg);
-      Location.set(FrameReg, Offset);
-      
-      if (VD.hasComplexAddress())
-        addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-      else if (VD.isBlockByrefVariable())
-        addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-      else
+
+  unsigned Offset = DV->getDotDebugLocOffset();
+  if (Offset != ~0U) {
+    addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+             Asm->GetTempSymbol("debug_loc", Offset));
+    DV->setDIE(VariableDie);
+    UseDotDebugLocEntry.insert(VariableDie);
+    return VariableDie;
+  }
+
+  // Check if variable is described by a  DBG_VALUE instruction.
+  DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI =
+    DbgVariableToDbgInstMap.find(DV);
+  if (DVI != DbgVariableToDbgInstMap.end()) {
+    const MachineInstr *DVInsn = DVI->second;
+    const MCSymbol *DVLabel = findVariableLabel(DV);
+    bool updated = false;
+    // FIXME : Handle getNumOperands != 3 
+    if (DVInsn->getNumOperands() == 3) {
+      if (DVInsn->getOperand(0).isReg())
+        updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
+      else if (DVInsn->getOperand(0).isImm())
+        updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+      else if (DVInsn->getOperand(0).isFPImm()) 
+        updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+    } else {
+      MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
+      if (Location.getReg()) {
         addAddress(VariableDie, dwarf::DW_AT_location, Location);
+        if (DVLabel)
+          addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+                   DVLabel);
+        updated = true;
+      }
     }
-  }
+    if (!updated) {
+      // If variableDie is not updated then DBG_VALUE instruction does not
+      // have valid variable info.
+      delete VariableDie;
+      return NULL;
+    }
+    DV->setDIE(VariableDie);
+    return VariableDie;
+  } 
 
-  if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
-    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+  // .. else use frame index, if available.
+  MachineLocation Location;
+  unsigned FrameReg;
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  int FI = 0;
+  if (findVariableFrameIndex(DV, &FI)) {
+    int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+    Location.set(FrameReg, Offset);
+    
+    if (VD.hasComplexAddress())
+      addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else if (VD.isBlockByrefVariable())
+      addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else
+      addAddress(VariableDie, dwarf::DW_AT_location, Location);
+  }
   DV->setDIE(VariableDie);
   return VariableDie;
 
@@ -1651,14 +1659,15 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
 
   DIArray Args = SPTy.getTypeArray();
   for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
-    DIType ATy(Args.getElement(i).getNode());
-    if (!ATy.isValid())
+    DIType ATy(Args.getElement(i));
+    if (!ATy.Verify())
       continue;
     DICompositeType CATy = getDICompositeType(ATy);
-    if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()
+    if (DIDescriptor(CATy).Verify() && !CATy.getName().empty()
         && !CATy.isForwardDecl()) {
-      if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
-        ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
+      CompileUnit *TheCU = getCompileUnit(CATy);
+      if (DIEEntry *Entry = TheCU->getDIEEntry(CATy))
+        TheCU->addGlobalType(CATy.getName(), Entry->getEntry());
     }
   }
 }
@@ -1674,9 +1683,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
     ScopeDIE = constructInlinedScopeDIE(Scope);
   else if (DS.isSubprogram()) {
     if (Scope->isAbstractScope())
-      ScopeDIE = ModuleCU->getDIE(DS.getNode());
+      ScopeDIE = getCompileUnit(DS)->getDIE(DS);
     else
-      ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
+      ScopeDIE = updateSubprogramScopeDIE(DS);
   }
   else
     ScopeDIE = constructLexicalScopeDIE(Scope);
@@ -1700,7 +1709,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   }
 
   if (DS.isSubprogram()) 
-    addPubTypes(DISubprogram(DS.getNode()));
+    addPubTypes(DISubprogram(DS));
  
  return ScopeDIE;
 }
@@ -1744,11 +1753,12 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
 
 /// getOrCreateNameSpace - Create a DIE for DINameSpace.
 DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
-  DIE *NDie = ModuleCU->getDIE(NS.getNode());
+  CompileUnit *TheCU = getCompileUnit(NS);
+  DIE *NDie = TheCU->getDIE(NS);
   if (NDie)
     return NDie;
   NDie = new DIE(dwarf::DW_TAG_namespace);
-  ModuleCU->insertDIE(NS.getNode(), NDie);
+  TheCU->insertDIE(NS, NDie);
   if (!NS.getName().empty())
     addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
   addSourceLine(NDie, &NS);
@@ -1756,12 +1766,10 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
   return NDie;
 }
 
-void DwarfDebug::constructCompileUnit(MDNode *N) {
+/// constructCompileUnit - Create new CompileUnit for the given 
+/// metadata node with tag DW_TAG_compile_unit.
+void DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
-  // Use first compile unit marked as isMain as the compile unit for this
-  // module.
-  if (ModuleCU || !DIUnit.isMain())
-    return;
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
   unsigned ID = GetOrCreateSourceID(Dir, FN);
@@ -1794,12 +1802,44 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
     addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
-  assert(!ModuleCU &&
-         "ModuleCU assigned since the top of constructCompileUnit");
-  ModuleCU = new CompileUnit(ID, Die);
+  CompileUnit *NewCU = new CompileUnit(ID, Die);
+  if (!FirstCU)
+    FirstCU = NewCU;
+  CUMap.insert(std::make_pair(N, NewCU));
+}
+
+/// getCompielUnit - Get CompileUnit DIE.
+CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
+  assert (N && "Invalid DwarfDebug::getCompileUnit argument!");
+  DIDescriptor D(N);
+  const MDNode *CUNode = NULL;
+  if (D.isCompileUnit())
+    CUNode = N;
+  else if (D.isSubprogram())
+    CUNode = DISubprogram(N).getCompileUnit();
+  else if (D.isType())
+    CUNode = DIType(N).getCompileUnit();
+  else if (D.isGlobalVariable())
+    CUNode = DIGlobalVariable(N).getCompileUnit();
+  else if (D.isVariable())
+    CUNode = DIVariable(N).getCompileUnit();
+  else if (D.isNameSpace())
+    CUNode = DINameSpace(N).getCompileUnit();
+  else if (D.isFile())
+    CUNode = DIFile(N).getCompileUnit();
+  else
+    return FirstCU;
+
+  DenseMap<const MDNode *, CompileUnit *>::const_iterator I
+    = CUMap.find(CUNode);
+  if (I == CUMap.end())
+    return FirstCU;
+  return I->second;
 }
 
-void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
+
+/// constructGlobalVariableDIE - Construct global variable DIE.
+void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
   DIGlobalVariable DI_GV(N);
 
   // If debug information is malformed then ignore it.
@@ -1807,7 +1847,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
     return;
 
   // Check for pre-existence.
-  if (ModuleCU->getDIE(DI_GV.getNode()))
+  CompileUnit *TheCU = getCompileUnit(N);
+  if (TheCU->getDIE(DI_GV))
     return;
 
   DIE *VariableDie = createGlobalVariableDIE(DI_GV);
@@ -1815,7 +1856,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
     return;
 
   // Add to map.
-  ModuleCU->insertDIE(N, VariableDie);
+  TheCU->insertDIE(N, VariableDie);
 
   // Add to context owner.
   DIDescriptor GVContext = DI_GV.getContext();
@@ -1823,7 +1864,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   // or a subprogram.
   if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
       !GVContext.isFile() && 
-      !isSubprogramContext(GVContext.getNode())) {
+      !isSubprogramContext(GVContext)) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1834,7 +1875,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
              Asm->Mang->getSymbol(DI_GV.getGlobal()));
     addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
     addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-    ModuleCU->addDie(VariableSpecDIE);
+    TheCU->addDie(VariableSpecDIE);
   } else {
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
@@ -1845,23 +1886,25 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   addToContextOwner(VariableDie, GVContext);
   
   // Expose as global. FIXME - need to check external flag.
-  ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
+  TheCU->addGlobal(DI_GV.getName(), VariableDie);
 
   DIType GTy = DI_GV.getType();
   if (GTy.isCompositeType() && !GTy.getName().empty()
       && !GTy.isForwardDecl()) {
-    DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
+    DIEEntry *Entry = TheCU->getDIEEntry(GTy);
     assert(Entry && "Missing global type!");
-    ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
+    TheCU->addGlobalType(GTy.getName(), Entry->getEntry());
   }
   return;
 }
 
-void DwarfDebug::constructSubprogramDIE(MDNode *N) {
+/// construct SubprogramDIE - Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
   DISubprogram SP(N);
 
   // Check for pre-existence.
-  if (ModuleCU->getDIE(N))
+  CompileUnit *TheCU = getCompileUnit(N);
+  if (TheCU->getDIE(N))
     return;
 
   if (!SP.isDefinition())
@@ -1872,13 +1915,13 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
   DIE *SubprogramDie = createSubprogramDIE(SP);
 
   // Add to map.
-  ModuleCU->insertDIE(N, SubprogramDie);
+  TheCU->insertDIE(N, SubprogramDie);
 
   // Add to context owner.
   addToContextOwner(SubprogramDie, SP.getContext());
 
   // Expose as global.
-  ModuleCU->addGlobal(SP.getName(), SubprogramDie);
+  TheCU->addGlobal(SP.getName(), SubprogramDie);
 
   return;
 }
@@ -1952,7 +1995,7 @@ void DwarfDebug::beginModule(Module *M) {
 /// endModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::endModule() {
-  if (!ModuleCU) return;
+  if (!FirstCU) return;
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -1961,12 +2004,12 @@ void DwarfDebug::endModule() {
     addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
-  for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(),
+  for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
          CE = ContainingTypeMap.end(); CI != CE; ++CI) {
     DIE *SPDie = CI->first;
-    MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+    const MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
     if (!N) continue;
-    DIE *NDie = ModuleCU->getDIE(N);
+    DIE *NDie = getCompileUnit(N)->getDIE(N);
     if (!NDie) continue;
     addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
   }
@@ -2027,68 +2070,48 @@ void DwarfDebug::endModule() {
   // Emit info into a debug str section.
   emitDebugStr();
   
-  delete ModuleCU;
-  ModuleCU = NULL;  // Reset for the next Module, if any.
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I)
+    delete I->second;
+  FirstCU = NULL;  // Reset for the next Module, if any.
 }
 
 /// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
-                                              unsigned FrameIdx,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, 
                                               DebugLoc ScopeLoc) {
 
-  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
   if (AbsDbgVariable)
     return AbsDbgVariable;
 
-  LLVMContext &Ctx = Var.getNode()->getContext();
+  LLVMContext &Ctx = Var->getContext();
   DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
   if (!Scope)
     return NULL;
 
-  AbsDbgVariable = new DbgVariable(Var, FrameIdx,
-                                   NULL /* No more-abstract variable*/);
+  AbsDbgVariable = new DbgVariable(Var);
   Scope->addVariable(AbsDbgVariable);
-  AbstractVariables[Var.getNode()] = AbsDbgVariable;
+  AbstractVariables[Var] = AbsDbgVariable;
   return AbsDbgVariable;
 }
 
-/// findAbstractVariable - Find abstract variable, if any, associated with Var.
-/// FIXME : Refactor findAbstractVariable.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
-                                              const MachineInstr *MI,
-                                              DebugLoc ScopeLoc) {
-
-  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
-  if (AbsDbgVariable)
-    return AbsDbgVariable;
-
-  LLVMContext &Ctx = Var.getNode()->getContext();
-  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
-  if (!Scope)
-    return NULL;
-
-  AbsDbgVariable = new DbgVariable(Var, MI,
-                                   NULL /* No more-abstract variable*/);
-  Scope->addVariable(AbsDbgVariable);
-  AbstractVariables[Var.getNode()] = AbsDbgVariable;
-  DbgValueStartMap[MI] = AbsDbgVariable;
-  return AbsDbgVariable;
-}
-
-/// collectVariableInfo - Populate DbgScope entries with variables' info.
-void DwarfDebug::collectVariableInfo() {
+/// collectVariableInfoFromMMITable - Collect variable information from
+/// side table maintained by MMI.
+void 
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
+                                   SmallPtrSet<const MDNode *, 16> &Processed) {
   const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
-
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
   for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
          VE = VMap.end(); VI != VE; ++VI) {
-    MDNode *Var = VI->first;
+    const MDNode *Var = VI->first;
     if (!Var) continue;
+    Processed.insert(Var);
     DIVariable DV(Var);
     const std::pair<unsigned, DebugLoc> &VP = VI->second;
 
     DbgScope *Scope = 0;
-    if (MDNode *IA = VP.second.getInlinedAt(Ctx))
+    if (const MDNode *IA = VP.second.getInlinedAt(Ctx))
       Scope = ConcreteScopes.lookup(IA);
     if (Scope == 0)
       Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
@@ -2097,100 +2120,192 @@ void DwarfDebug::collectVariableInfo() {
     if (Scope == 0)
       continue;
 
-    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, VP.second);
-    DbgVariable *RegVar = new DbgVariable(DV, VP.first, AbsDbgVariable);
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+    DbgVariable *RegVar = new DbgVariable(DV);
+    recordVariableFrameIndex(RegVar, VP.first);
     Scope->addVariable(RegVar);
+    if (AbsDbgVariable) {
+      recordVariableFrameIndex(AbsDbgVariable, VP.first);
+      VarToAbstractVarMap[RegVar] = AbsDbgVariable;
+    }
   }
+}
 
+/// isDbgValueInUndefinedReg - Return true if debug value, encoded by 
+/// DBG_VALUE instruction, is in undefined reg.
+static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
+  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+  if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg())
+    return true;
+  return false;
+}
+
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by 
+/// DBG_VALUE instruction, is in a defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+  if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+    return true;
+  return false;
+}
+
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
+  SmallPtrSet<const MDNode *, 16> Processed;
+  
+  /// collection info from MMI table.
+  collectVariableInfoFromMMITable(MF, Processed);
+
+  SmallVector<const MachineInstr *, 8> DbgValues;
   // Collect variable information from DBG_VALUE machine instructions;
   for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
-       I != E; ++I) {
+       I != E; ++I)
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      if (!MInsn->isDebugValue())
+      if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn))
         continue;
+      DbgValues.push_back(MInsn);
+    }
 
-      // Ignore Undef values.
-      if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg())
-        continue;
+  // This is a collection of DBV_VALUE instructions describing same variable.
+  SmallVector<const MachineInstr *, 4> MultipleValues;
+  for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(),
+        E = DbgValues.end(); I != E; ++I) {
+    const MachineInstr *MInsn = *I;
+    MultipleValues.clear();
+    if (isDbgValueInDefinedReg(MInsn))
+      MultipleValues.push_back(MInsn);
+    DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata());
+    if (Processed.count(DV) != 0)
+      continue;
+
+    for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, 
+           ME = DbgValues.end(); MI != ME; ++MI) {
+      const MDNode *Var = 
+        (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
+      if (Var == DV && isDbgValueInDefinedReg(*MI))
+        MultipleValues.push_back(*MI);
+    }
+
+    DbgScope *Scope = findDbgScope(MInsn);
+    if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable)
+      Scope = CurrentFnDbgScope;
+    // If variable scope is not found then skip this variable.
+    if (!Scope)
+      continue;
 
-      DIVariable DV(
-        const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1)
-                               .getMetadata()));
-      if (DV.getTag() == dwarf::DW_TAG_arg_variable)  {
-        // FIXME Handle inlined subroutine arguments.
-        DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
-        CurrentFnDbgScope->addVariable(ArgVar);
-        DbgValueStartMap[MInsn] = ArgVar;
+    Processed.insert(DV);
+    DbgVariable *RegVar = new DbgVariable(DV);
+    Scope->addVariable(RegVar);
+    if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+      DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); 
+    if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
+      DbgVariableToDbgInstMap[AbsVar] = MInsn;
+      VarToAbstractVarMap[RegVar] = AbsVar;
+    }
+    if (MultipleValues.size() <= 1) {
+      DbgVariableToDbgInstMap[RegVar] = MInsn;
+      continue;
+    }
+
+    // handle multiple DBG_VALUE instructions describing one variable.
+    if (DotDebugLocEntries.empty())
+      RegVar->setDotDebugLocOffset(0);
+    else
+      RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+    const MachineInstr *Begin = NULL;
+    const MachineInstr *End = NULL;
+    for (SmallVector<const MachineInstr *, 4>::iterator 
+           MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) {
+      if (!Begin) {
+        Begin = *MVI;
         continue;
+      } 
+      End = *MVI;
+      MachineLocation MLoc;
+      MLoc.set(Begin->getOperand(0).getReg(), 0);
+      const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+      const MCSymbol *SLabel = getLabelBeforeInsn(End);
+      DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+      Begin = End;
+      if (MVI + 1 == MVE) {
+        // If End is the last instruction then its value is valid
+        // until the end of the funtion.
+        MLoc.set(End->getOperand(0).getReg(), 0);
+        DotDebugLocEntries.
+          push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc));
       }
+    }
+    DotDebugLocEntries.push_back(DotDebugLocEntry());
+  }
 
-      DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown()) continue;
-      DbgScope *Scope = 0;
-      if (MDNode *IA = DL.getInlinedAt(Ctx))
-        Scope = ConcreteScopes.lookup(IA);
-      if (Scope == 0)
-        Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
-      
-      // If variable scope is not found then skip this variable.
-      if (Scope == 0)
+  // Collect info for variables that were optimized out.
+  if (NamedMDNode *NMD = 
+      MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) {
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+      if (!DV || !Processed.insert(DV))
         continue;
-
-      DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL);
-      DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable);
-      DbgValueStartMap[MInsn] = RegVar;
-      Scope->addVariable(RegVar);
+      DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
+      if (Scope)
+        Scope->addVariable(new DbgVariable(DV));
     }
   }
 }
 
+/// getLabelBeforeInsn - Return Label preceding the instruction.
+const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+    LabelsBeforeInsn.find(MI);
+  if (I == LabelsBeforeInsn.end())
+    // FunctionBeginSym always preceeds all the instruction in current function.
+    return FunctionBeginSym;
+  return I->second;
+}
+
+/// getLabelAfterInsn - Return Label immediately following the instruction.
+const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+    LabelsAfterInsn.find(MI);
+  if (I == LabelsAfterInsn.end())
+    return NULL;
+  return I->second;
+}
+
 /// beginScope - Process beginning of a scope.
 void DwarfDebug::beginScope(const MachineInstr *MI) {
-  // Check location.
-  DebugLoc DL = MI->getDebugLoc();
-  if (DL.isUnknown())
+  if (InsnNeedsLabel.count(MI) == 0) {
+    LabelsBeforeInsn[MI] = PrevLabel;
     return;
+  }
 
-  MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
-  
-  // FIXME: Should only verify each scope once!
-  if (!DIScope(Scope).Verify())
+  // Check location.
+  DebugLoc DL = MI->getDebugLoc();
+  if (!DL.isUnknown()) {
+    const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+    PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+    PrevInstLoc = DL;
+    LabelsBeforeInsn[MI] = PrevLabel;
     return;
+  }
 
-  // DBG_VALUE instruction establishes new value.
+  // If location is unknown then use temp label for this DBG_VALUE 
+  // instruction.
   if (MI->isDebugValue()) {
-    DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
-      = DbgValueStartMap.find(MI);
-    if (DI != DbgValueStartMap.end()) {
-      MCSymbol *Label = NULL;
-      if (DL == PrevInstLoc)
-        Label = PrevLabel;
-      else {
-        Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-        PrevInstLoc = DL;
-        PrevLabel = Label;
-      }
-      DI->second->setDbgValueLabel(Label);
-    }
+    PrevLabel = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(PrevLabel);
+    LabelsBeforeInsn[MI] = PrevLabel;
     return;
   }
 
-  // Emit a label to indicate location change. This is used for line 
-  // table even if this instruction does not start a new scope.
-  MCSymbol *Label = NULL;
-  if (DL == PrevInstLoc)
-    Label = PrevLabel;
-  else {
-    Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-    PrevInstLoc = DL;
-    PrevLabel = Label;
+  if (UnknownLocations) {
+    PrevLabel = recordSourceLine(0, 0, 0);
+    LabelsBeforeInsn[MI] = PrevLabel;
+    return;
   }
 
-  // If this instruction begins a scope then note down corresponding label.
-  if (InsnsBeginScopeSet.count(MI) != 0)
-    LabelsBeforeInsn[MI] = Label;
+  assert (0 && "Instruction is not processed!");
 }
 
 /// endScope - Process end of a scope.
@@ -2204,7 +2319,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
 }
 
 /// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) {
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
@@ -2213,7 +2328,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
     if (DIDescriptor(Scope).isLexicalBlock()) {
       DbgScope *Parent = 
-        getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+        getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
       WScope->setParent(Parent);
       Parent->addScope(WScope);
     }
@@ -2235,7 +2350,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
   DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
   DILocation DL(InlinedAt);
   DbgScope *Parent =
-    getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+    getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation());
   WScope->setParent(Parent);
   Parent->addScope(WScope);
 
@@ -2249,13 +2364,13 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
 /// machine instruction encodes valid location info.
 static bool hasValidLocation(LLVMContext &Ctx,
                              const MachineInstr *MInsn,
-                             MDNode *&Scope, MDNode *&InlinedAt) {
+                             const MDNode *&Scope, const MDNode *&InlinedAt) {
   if (MInsn->isDebugValue())
     return false;
   DebugLoc DL = MInsn->getDebugLoc();
   if (DL.isUnknown()) return false;
       
-  MDNode *S = DL.getScope(Ctx);
+  const MDNode *S = DL.getScope(Ctx);
   
   // There is no need to create another DIE for compile unit. For all
   // other scopes, create one DbgScope now. This will be translated
@@ -2307,8 +2422,8 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      MDNode *Scope = NULL;
-      MDNode *InlinedAt = NULL;
+      const MDNode *Scope = NULL;
+      const MDNode *InlinedAt = NULL;
 
       // Check if instruction has valid location information.
       if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
@@ -2344,8 +2459,8 @@ bool DwarfDebug::extractScopeInformation() {
   LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
   SmallVector<DbgRange, 4> MIRanges;
   DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
-  MDNode *PrevScope = NULL;
-  MDNode *PrevInlinedAt = NULL;
+  const MDNode *PrevScope = NULL;
+  const MDNode *PrevInlinedAt = NULL;
   const MachineInstr *RangeBeginMI = NULL;
   const MachineInstr *PrevMI = NULL;
   for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
@@ -2353,8 +2468,8 @@ bool DwarfDebug::extractScopeInformation() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      MDNode *Scope = NULL;
-      MDNode *InlinedAt = NULL;
+      const MDNode *Scope = NULL;
+      const MDNode *InlinedAt = NULL;
 
       // Check if instruction has valid location information.
       if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
@@ -2476,8 +2591,6 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   if (!extractScopeInformation()) return;
-  
-  collectVariableInfo();
 
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
@@ -2489,7 +2602,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   DebugLoc FDL = FindFirstDebugLoc(MF);
   if (FDL.isUnknown()) return;
   
-  MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
+  const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
   
   DISubprogram SP = getDISubprogram(Scope);
   unsigned Line, Col;
@@ -2502,6 +2615,40 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   }
   
   recordSourceLine(Line, Col, Scope);
+
+  DebugLoc PrevLoc;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+      DebugLoc DL = MI->getDebugLoc();
+      if (MI->isDebugValue()) {
+        // DBG_VALUE needs a label if the variable is local variable or
+        // an argument whose location is changing.
+        assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+        DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
+        if (!DV.Verify()) continue;
+        if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+          InsnNeedsLabel.insert(MI);
+        else if (!ProcessedArgs.insert(DV))
+          InsnNeedsLabel.insert(MI);
+      } else {
+        // If location is unknown then instruction needs a location only if 
+        // UnknownLocations flag is set.
+        if (DL.isUnknown()) {
+          if (UnknownLocations && !PrevLoc.isUnknown())
+            InsnNeedsLabel.insert(MI);
+        } else if (DL != PrevLoc)
+          // Otherwise, instruction needs a location only if it is new location.
+          InsnNeedsLabel.insert(MI);
+      }
+      
+      if (!DL.isUnknown() || UnknownLocations)
+        PrevLoc = DL;
+    }
+
+  PrevLabel = FunctionBeginSym;
 }
 
 /// endFunction - Gather and emit post-function debug information.
@@ -2510,10 +2657,15 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
 
   if (CurrentFnDbgScope) {
+
     // Define end label for subprogram.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_end",
-                                                  Asm->getFunctionNumber()));
+    FunctionEndSym = Asm->GetTempSymbol("func_end",
+                                        Asm->getFunctionNumber());
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(FunctionEndSym);
     
+    collectVariableInfo(MF);
+
     // Get function line info.
     if (!Lines.empty()) {
       // Get section line info.
@@ -2543,10 +2695,15 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 
   // Clear debug info
   CurrentFnDbgScope = NULL;
+  InsnNeedsLabel.clear();
+  ProcessedArgs.clear();
+  DbgVariableToFrameIndexMap.clear();
+  VarToAbstractVarMap.clear();
+  DbgVariableToDbgInstMap.clear();
+  DbgVariableLabelsMap.clear();
   DeleteContainerSeconds(DbgScopeMap);
   InsnsBeginScopeSet.clear();
   InsnsEndScopeSet.clear();
-  DbgValueStartMap.clear();
   ConcreteScopes.clear();
   DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
@@ -2557,30 +2714,82 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   PrevLabel = NULL;
 }
 
+/// recordVariableFrameIndex - Record a variable's index.
+void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) {
+  assert (V && "Invalid DbgVariable!");
+  DbgVariableToFrameIndexMap[V] = Index;
+}
+
+/// findVariableFrameIndex - Return true if frame index for the variable
+/// is found. Update FI to hold value of the index.
+bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
+  assert (V && "Invalid DbgVariable!");
+  DenseMap<const DbgVariable *, int>::iterator I =
+    DbgVariableToFrameIndexMap.find(V);
+  if (I == DbgVariableToFrameIndexMap.end())
+    return false;
+  *FI = I->second;
+  return true;
+}
+
+/// findVariableLabel - Find MCSymbol for the variable.
+const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
+  DenseMap<const DbgVariable *, const MCSymbol *>::iterator I
+    = DbgVariableLabelsMap.find(V);
+  if (I == DbgVariableLabelsMap.end())
+    return NULL;
+  else return I->second;
+}
+
+/// findDbgScope - Find DbgScope for the debug loc attached with an 
+/// instruction.
+DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
+  DbgScope *Scope = NULL;
+  LLVMContext &Ctx = 
+    MInsn->getParent()->getParent()->getFunction()->getContext();
+  DebugLoc DL = MInsn->getDebugLoc();
+
+  if (DL.isUnknown()) 
+    return Scope;
+
+  if (const MDNode *IA = DL.getInlinedAt(Ctx))
+    Scope = ConcreteScopes.lookup(IA);
+  if (Scope == 0)
+    Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+    
+  return Scope;
+}
+
+
 /// recordSourceLine - Register a source line with debug info. Returns the
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) {
   StringRef Dir;
   StringRef Fn;
 
-  DIDescriptor Scope(S);
-  if (Scope.isCompileUnit()) {
-    DICompileUnit CU(S);
-    Dir = CU.getDirectory();
-    Fn = CU.getFilename();
-  } else if (Scope.isSubprogram()) {
-    DISubprogram SP(S);
-    Dir = SP.getDirectory();
-    Fn = SP.getFilename();
-  } else if (Scope.isLexicalBlock()) {
-    DILexicalBlock DB(S);
-    Dir = DB.getDirectory();
-    Fn = DB.getFilename();
-  } else
-    assert(0 && "Unexpected scope info");
+  unsigned Src = 1;
+  if (S) {
+    DIDescriptor Scope(S);
+
+    if (Scope.isCompileUnit()) {
+      DICompileUnit CU(S);
+      Dir = CU.getDirectory();
+      Fn = CU.getFilename();
+    } else if (Scope.isSubprogram()) {
+      DISubprogram SP(S);
+      Dir = SP.getDirectory();
+      Fn = SP.getFilename();
+    } else if (Scope.isLexicalBlock()) {
+      DILexicalBlock DB(S);
+      Dir = DB.getDirectory();
+      Fn = DB.getFilename();
+    } else
+      assert(0 && "Unexpected scope info");
+
+    Src = GetOrCreateSourceID(Dir, Fn);
+  }
 
-  unsigned Src = GetOrCreateSourceID(Dir, Fn);
   MCSymbol *Label = MMI->getContext().CreateTempSymbol();
   Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
 
@@ -2643,14 +2852,18 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
 /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
 ///
 void DwarfDebug::computeSizeAndOffsets() {
-  // Compute size of compile unit header.
-  static unsigned Offset =
-    sizeof(int32_t) + // Length of Compilation Unit Info
-    sizeof(int16_t) + // DWARF version number
-    sizeof(int32_t) + // Offset Into Abbrev. Section
-    sizeof(int8_t);   // Pointer Size (in bytes)
-
-  computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
+  unsigned PrevOffset = 0;
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    // Compute size of compile unit header.
+    static unsigned Offset = PrevOffset +
+      sizeof(int32_t) + // Length of Compilation Unit Info
+      sizeof(int16_t) + // DWARF version number
+      sizeof(int32_t) + // Offset Into Abbrev. Section
+      sizeof(int8_t);   // Pointer Size (in bytes)
+    computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+    PrevOffset = Offset;
+  }
 }
 
 /// EmitSectionSym - Switch to the specified MCSection and emit an assembler
@@ -2694,6 +2907,9 @@ void DwarfDebug::EmitSectionLabels() {
   DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
                                              "debug_range");
 
+  DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+                                           "section_debug_loc");
+
   TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
   EmitSectionSym(Asm, TLOF.getDataSection());
 }
@@ -2745,6 +2961,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
                                      4);
       break;
     }
+    case dwarf::DW_AT_location: {
+      if (UseDotDebugLocEntry.count(Die) != 0) {
+        DIELabel *L = cast<DIELabel>(Values[i]);
+        Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+      } else
+        Values[i]->EmitValue(Asm, Form);
+      break;
+    }
     default:
       // Emit an attribute using the defined form.
       Values[i]->EmitValue(Asm, Form);
@@ -2771,37 +2995,41 @@ void DwarfDebug::emitDebugInfo() {
   // Start debug info section.
   Asm->OutStreamer.SwitchSection(
                             Asm->getObjFileLowering().getDwarfInfoSection());
-  DIE *Die = ModuleCU->getCUDie();
-
-  // Emit the compile units header.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
-                                                ModuleCU->getID()));
-
-  // Emit size of content not including length itself
-  unsigned ContentSize = Die->getSize() +
-    sizeof(int16_t) + // DWARF version number
-    sizeof(int32_t) + // Offset Into Abbrev. Section
-    sizeof(int8_t) +  // Pointer Size (in bytes)
-    sizeof(int32_t);  // FIXME - extra pad for gdb bug.
-
-  Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
-  Asm->EmitInt32(ContentSize);
-  Asm->OutStreamer.AddComment("DWARF version number");
-  Asm->EmitInt16(dwarf::DWARF_VERSION);
-  Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-  Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
-                         DwarfAbbrevSectionSym);
-  Asm->OutStreamer.AddComment("Address Size (in bytes)");
-  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
-
-  emitDIE(Die);
-  // FIXME - extra padding for gdb bug.
-  Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
-  Asm->EmitInt8(0);
-  Asm->EmitInt8(0);
-  Asm->EmitInt8(0);
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", ModuleCU->getID()));
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    DIE *Die = TheCU->getCUDie();
+    
+    // Emit the compile units header.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+                                                  TheCU->getID()));
+    
+    // Emit size of content not including length itself
+    unsigned ContentSize = Die->getSize() +
+      sizeof(int16_t) + // DWARF version number
+      sizeof(int32_t) + // Offset Into Abbrev. Section
+      sizeof(int8_t) +  // Pointer Size (in bytes)
+      sizeof(int32_t);  // FIXME - extra pad for gdb bug.
+    
+    Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+    Asm->EmitInt32(ContentSize);
+    Asm->OutStreamer.AddComment("DWARF version number");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+                           DwarfAbbrevSectionSym);
+    Asm->OutStreamer.AddComment("Address Size (in bytes)");
+    Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+    
+    emitDIE(Die);
+    // FIXME - extra padding for gdb bug.
+    Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
+    Asm->EmitInt8(0);
+    Asm->EmitInt8(0);
+    Asm->EmitInt8(0);
+    Asm->EmitInt8(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+  }
 }
 
 /// emitAbbreviations - Emit the abbreviation section.
@@ -2967,8 +3195,6 @@ void DwarfDebug::emitDebugLines() {
       MCSymbol *Label = LineInfo.getLabel();
       if (!Label->isDefined()) continue; // Not emitted, in dead code.
 
-      if (LineInfo.getLine() == 0) continue;
-
       if (Asm->isVerbose()) {
         std::pair<unsigned, unsigned> SrcID =
           getSourceDirectoryAndFileIds(LineInfo.getSourceID());
@@ -3128,91 +3354,99 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
 void DwarfDebug::emitDebugPubNames() {
-  // Start the dwarf pubnames section.
-  Asm->OutStreamer.SwitchSection(
-                          Asm->getObjFileLowering().getDwarfPubNamesSection());
-
-  Asm->OutStreamer.AddComment("Length of Public Names Info");
-  Asm->EmitLabelDifference(
-                 Asm->GetTempSymbol("pubnames_end", ModuleCU->getID()),
-                 Asm->GetTempSymbol("pubnames_begin", ModuleCU->getID()), 4);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
-                                                ModuleCU->getID()));
-
-  Asm->OutStreamer.AddComment("DWARF Version");
-  Asm->EmitInt16(dwarf::DWARF_VERSION); 
-
-  Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-  Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()), 
-                         DwarfInfoSectionSym);
-
-  Asm->OutStreamer.AddComment("Compilation Unit Length");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()),
-                           Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
-                           4);
-
-  const StringMap<DIE*> &Globals = ModuleCU->getGlobals();
-  for (StringMap<DIE*>::const_iterator
-         GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
-    const char *Name = GI->getKeyData();
-    DIE *Entity = GI->second;
-
-    Asm->OutStreamer.AddComment("DIE offset");
-    Asm->EmitInt32(Entity->getOffset());
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubNamesSection());
     
-    if (Asm->isVerbose())
-      Asm->OutStreamer.AddComment("External Name");
-    Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+    Asm->OutStreamer.AddComment("Length of Public Names Info");
+    Asm->EmitLabelDifference(
+      Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
+      Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+    
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+                                                  TheCU->getID()));
+    
+    Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION); 
+    
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), 
+                           DwarfInfoSectionSym);
+    
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                             4);
+    
+    const StringMap<DIE*> &Globals = TheCU->getGlobals();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      DIE *Entity = GI->second;
+      
+      Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+      
+      if (Asm->isVerbose())
+        Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+    }
+    
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+                                                TheCU->getID()));
   }
-
-  Asm->OutStreamer.AddComment("End Mark");
-  Asm->EmitInt32(0);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
-                                                ModuleCU->getID()));
 }
 
 void DwarfDebug::emitDebugPubTypes() {
-  // Start the dwarf pubnames section.
-  Asm->OutStreamer.SwitchSection(
-                          Asm->getObjFileLowering().getDwarfPubTypesSection());
-  Asm->OutStreamer.AddComment("Length of Public Types Info");
-  Asm->EmitLabelDifference(
-                    Asm->GetTempSymbol("pubtypes_end", ModuleCU->getID()),
-                    Asm->GetTempSymbol("pubtypes_begin", ModuleCU->getID()), 4);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
-                                                ModuleCU->getID()));
-
-  if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
-  Asm->EmitInt16(dwarf::DWARF_VERSION);
-
-  Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info");
-  Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
-                         DwarfInfoSectionSym);
-
-  Asm->OutStreamer.AddComment("Compilation ModuleCU Length");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()),
-                           Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
-                           4);
-
-  const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes();
-  for (StringMap<DIE*>::const_iterator
-         GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
-    const char *Name = GI->getKeyData();
-    DIE * Entity = GI->second;
-
-    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
-    Asm->EmitInt32(Entity->getOffset());
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubTypesSection());
+    Asm->OutStreamer.AddComment("Length of Public Types Info");
+    Asm->EmitLabelDifference(
+      Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
+      Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+    
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+                                                  TheCU->getID()));
     
-    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
-    Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                           DwarfInfoSectionSym);
+    
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                             4);
+    
+    const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      DIE * Entity = GI->second;
+      
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+      
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+    }
+    
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0); 
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+                                                  TheCU->getID()));
   }
-
-  Asm->OutStreamer.AddComment("End Mark");
-  Asm->EmitInt32(0); 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
-                                                ModuleCU->getID()));
 }
 
 /// emitDebugStr - Emit visible names into a debug str section.
@@ -3248,9 +3482,39 @@ void DwarfDebug::emitDebugStr() {
 /// emitDebugLoc - Emit visible names into a debug loc section.
 ///
 void DwarfDebug::emitDebugLoc() {
+  if (DotDebugLocEntries.empty())
+    return;
+
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
-                              Asm->getObjFileLowering().getDwarfLocSection());
+    Asm->getObjFileLowering().getDwarfLocSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+  unsigned index = 1;
+  for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(),
+         E = DotDebugLocEntries.end(); I != E; ++I, ++index) {
+    DotDebugLocEntry Entry = *I;
+    if (Entry.isEmpty()) {
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+    } else {
+      Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
+      Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+      const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+      unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
+      if (Reg < 32) {
+        Asm->OutStreamer.AddComment("Loc expr size");
+        Asm->EmitInt16(1);
+        Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+      } else {
+        Asm->OutStreamer.AddComment("Loc expr size");
+        Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg));
+        Asm->EmitInt8(dwarf::DW_OP_regx);
+        Asm->EmitULEB128(Reg);
+      }
+    }
+  }
 }
 
 /// EmitDebugARanges - Emit visible names into a debug aranges section.
@@ -3310,7 +3574,7 @@ void DwarfDebug::emitDebugInlineInfo() {
   if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
     return;
 
-  if (!ModuleCU)
+  if (!FirstCU)
     return;
 
   Asm->OutStreamer.SwitchSection(
@@ -3327,11 +3591,11 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getTargetData().getPointerSize());
 
-  for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+  for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
 
-    MDNode *Node = *I;
-    DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+    const MDNode *Node = *I;
+    DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
       = InlineInfo.find(Node);
     SmallVector<InlineInfoLabels, 4> &Labels = II->second;
     DISubprogram SP(Node);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index b964b23..0d6116f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,6 +15,7 @@
 #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
 #include "DIE.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -30,7 +31,6 @@ class DbgConcreteScope;
 class DbgScope;
 class DbgVariable;
 class MachineFrameInfo;
-class MachineLocation;
 class MachineModuleInfo;
 class MachineOperand;
 class MCAsmInfo;
@@ -82,8 +82,8 @@ class DwarfDebug {
   // Attributes used to construct specific Dwarf sections.
   //
 
-  /// ModuleCU - All DIEs are inserted in ModuleCU.
-  CompileUnit *ModuleCU;
+  CompileUnit *FirstCU;
+  DenseMap <const MDNode *, CompileUnit *> CUMap;
 
   /// AbbreviationsSet - Used to uniquely define abbreviations.
   ///
@@ -146,15 +146,15 @@ class DwarfDebug {
   /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
   /// contained DbgScope*s.
   ///
-  DenseMap<MDNode *, DbgScope *> DbgScopeMap;
+  DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
 
   /// ConcreteScopes - Tracks the concrete scopees in the current function.
   /// These scopes are also included in DbgScopeMap.
-  DenseMap<MDNode *, DbgScope *> ConcreteScopes;
+  DenseMap<const MDNode *, DbgScope *> ConcreteScopes;
 
   /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
   /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
-  DenseMap<MDNode *, DbgScope *> AbstractScopes;
+  DenseMap<const MDNode *, DbgScope *> AbstractScopes;
 
   /// AbstractScopesList - Tracks abstract scopes constructed while processing
   /// a function. This list is cleared during endFunction().
@@ -162,13 +162,43 @@ class DwarfDebug {
 
   /// AbstractVariables - Collection on abstract variables.  Owned by the
   /// DbgScopes in AbstractScopes.
-  DenseMap<MDNode *, DbgVariable *> AbstractVariables;
-
-  /// DbgValueStartMap - Tracks starting scope of variable DIEs.
-  /// If the scope of an object begins sometime after the low pc value for the 
-  /// scope most closely enclosing the object, the object entry may have a 
-  /// DW_AT_start_scope attribute.
-  DenseMap<const MachineInstr *, DbgVariable *> DbgValueStartMap;
+  DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+  /// DbgVariableToFrameIndexMap - Tracks frame index used to find 
+  /// variable's value.
+  DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
+
+  /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
+  /// machine instruction.
+  DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
+
+  /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
+  DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
+
+  /// DotDebugLocEntry - This struct describes location entries emitted in
+  /// .debug_loc section.
+  typedef struct DotDebugLocEntry {
+    const MCSymbol *Begin;
+    const MCSymbol *End;
+    MachineLocation Loc;
+    DotDebugLocEntry() : Begin(0), End(0) {}
+    DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, 
+                  MachineLocation &L) : Begin(B), End(E), Loc(L) {}
+    /// Empty entries are also used as a trigger to emit temp label. Such
+    /// labels are referenced is used to find debug_loc offset for a given DIE.
+    bool isEmpty() { return Begin == 0 && End == 0; }
+  } DotDebugLocEntry;
+
+  /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+  SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+  /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
+  /// idetifies corresponding .debug_loc entry offset.
+  SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
+
+  /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
+  /// DbgVariable, if any.
+  DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
 
   /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
   /// (at the end of the module) as DW_AT_inline.
@@ -177,7 +207,7 @@ class DwarfDebug {
   /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
   /// need DW_AT_containing_type attribute. This attribute points to a DIE that
   /// corresponds to the MDNode mapped with the subprogram DIE.
-  DenseMap<DIE *, MDNode *> ContainingTypeMap;
+  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
   typedef SmallVector<DbgScope *, 2> ScopeVector;
   SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
@@ -185,9 +215,9 @@ class DwarfDebug {
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
-  typedef std::pair<MCSymbol*, DIE *> InlineInfoLabels;
-  DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo;
-  SmallVector<MDNode *, 4> InlinedSPNodes;
+  typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+  SmallVector<const MDNode *, 4> InlinedSPNodes;
 
   /// LabelsBeforeInsn - Maps instruction with label emitted before 
   /// instruction.
@@ -197,6 +227,13 @@ class DwarfDebug {
   /// instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
 
+  /// insnNeedsLabel - Collection of instructions that need a label to mark
+  /// a debuggging information entity.
+  SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
+
+  /// ProcessedArgs - Collection of arguments already processed.
+  SmallPtrSet<const MDNode *, 8> ProcessedArgs;
+
   SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
   /// Previous instruction's location information. This is used to determine
@@ -219,8 +256,8 @@ class DwarfDebug {
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-
-  MCSymbol *FunctionBeginSym;
+  MCSymbol *DwarfDebugLocSectionSym;
+  MCSymbol *FunctionBeginSym, *FunctionEndSym;
 private:
   
   /// getSourceDirectoryAndFileIds - Return the directory and file ids that
@@ -295,7 +332,7 @@ private:
   /// addSourceLine - Add location information to specified debug information
   /// entry.
   void addSourceLine(DIE *Die, const DIVariable *V);
-  void addSourceLine(DIE *Die, const DIGlobal *G);
+  void addSourceLine(DIE *Die, const DIGlobalVariable *G);
   void addSourceLine(DIE *Die, const DISubprogram *SP);
   void addSourceLine(DIE *Die, const DIType *Ty);
   void addSourceLine(DIE *Die, const DINameSpace *NS);
@@ -306,13 +343,13 @@ private:
                   const MachineLocation &Location);
 
   /// addRegisterAddress - Add register location entry in variable DIE.
-  bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+  bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
 
   /// addConstantValue - Add constant value entry in variable DIE.
-  bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
-  bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+  bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
 
   /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
@@ -380,21 +417,18 @@ private:
   DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
 
   /// getOrCreateDbgScope - Create DbgScope for the scope.
-  DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt);
+  DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
 
-  DbgScope *getOrCreateAbstractScope(MDNode *N);
+  DbgScope *getOrCreateAbstractScope(const MDNode *N);
 
   /// findAbstractVariable - Find abstract variable associated with Var.
-  DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
-                                    DebugLoc Loc);
-  DbgVariable *findAbstractVariable(DIVariable &Var, const MachineInstr *MI,
-                                    DebugLoc Loc);
+  DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
 
   /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
   /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
   /// If there are global variables in this scope then create and insert
   /// DIEs for these variables.
-  DIE *updateSubprogramScopeDIE(MDNode *SPNode);
+  DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
 
   /// constructLexicalScope - Construct new DW_TAG_lexical_block 
   /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
@@ -506,11 +540,18 @@ private:
   /// maps as well.
   unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
 
-  void constructCompileUnit(MDNode *N);
+  /// constructCompileUnit - Create new CompileUnit for the given 
+  /// metadata node with tag DW_TAG_compile_unit.
+  void constructCompileUnit(const MDNode *N);
+
+  /// getCompielUnit - Get CompileUnit DIE.
+  CompileUnit *getCompileUnit(const MDNode *N) const;
 
-  void constructGlobalVariableDIE(MDNode *N);
+  /// constructGlobalVariableDIE - Construct global variable DIE.
+  void constructGlobalVariableDIE(const MDNode *N);
 
-  void constructSubprogramDIE(MDNode *N);
+  /// construct SubprogramDIE - Construct subprogram DIE.
+  void constructSubprogramDIE(const MDNode *N);
 
   // FIXME: This should go away in favor of complex addresses.
   /// Find the type the programmer originally declared the variable to be
@@ -521,7 +562,7 @@ private:
   /// recordSourceLine - Register a source line with debug info. Returns the
   /// unique label that was emitted and which provides correspondence to
   /// the source line list.
-  MCSymbol *recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
+  MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
   
   /// getSourceLineCount - Return the number of source lines in the debug
   /// info.
@@ -529,6 +570,20 @@ private:
     return Lines.size();
   }
   
+  /// recordVariableFrameIndex - Record a variable's index.
+  void recordVariableFrameIndex(const DbgVariable *V, int Index);
+
+  /// findVariableFrameIndex - Return true if frame index for the variable
+  /// is found. Update FI to hold value of the index.
+  bool findVariableFrameIndex(const DbgVariable *V, int *FI);
+
+  /// findVariableLabel - Find MCSymbol for the variable.
+  const MCSymbol *findVariableLabel(const DbgVariable *V);
+
+  /// findDbgScope - Find DbgScope for the debug loc attached with an 
+  /// instruction.
+  DbgScope *findDbgScope(const MachineInstr *MI);
+
   /// identifyScopeMarkers() - Indentify instructions that are marking
   /// beginning of or end of a scope.
   void identifyScopeMarkers();
@@ -538,8 +593,12 @@ private:
   bool extractScopeInformation();
   
   /// collectVariableInfo - Populate DbgScope entries with variables' info.
-  void collectVariableInfo();
+  void collectVariableInfo(const MachineFunction *);
   
+  /// collectVariableInfoFromMMITable - Collect variable information from
+  /// side table maintained by MMI.
+  void collectVariableInfoFromMMITable(const MachineFunction * MF,
+                                       SmallPtrSet<const MDNode *, 16> &P);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -563,6 +622,12 @@ public:
   ///
   void endFunction(const MachineFunction *MF);
 
+  /// getLabelBeforeInsn - Return Label preceding the instruction.
+  const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+  /// getLabelAfterInsn - Return Label immediately following the instruction.
+  const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
   /// beginScope - Process beginning of a scope.
   void beginScope(const MachineInstr *MI);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 0ff1036..c872840 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -189,7 +189,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
   // EH Frame, but some environments do not handle weak absolute symbols. If
   // UnwindTablesMandatory is set we cannot do this optimization; the unwind
   // info is to be available for non-EH uses.
-  if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory &&
+  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
       (!TheFunc->isWeakForLinker() ||
        !Asm->MAI->getWeakDefDirective() ||
        TLOF.getSupportsWeakOmittedEHFrame())) {
@@ -949,11 +949,12 @@ void DwarfException::EndFunction() {
                                       TLOF.isFunctionEHFrameSymbolPrivate());
   
   // Save EH frame information
-  EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym,
-                                         Asm->getFunctionNumber(),
-                                         MMI->getPersonalityIndex(),
-                                         Asm->MF->getFrameInfo()->hasCalls(),
-                                         !MMI->getLandingPads().empty(),
-                                         MMI->getFrameMoves(),
-                                         Asm->MF->getFunction()));
+  EHFrames.
+    push_back(FunctionEHFrameInfo(FunctionEHSym,
+                                  Asm->getFunctionNumber(),
+                                  MMI->getPersonalityIndex(),
+                                  Asm->MF->getFrameInfo()->adjustsStack(),
+                                  !MMI->getLandingPads().empty(),
+                                  MMI->getFrameMoves(),
+                                  Asm->MF->getFunction()));
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 5839f8c..bc311e6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -45,7 +45,7 @@ class DwarfException {
     MCSymbol *FunctionEHSym;  // L_foo.eh
     unsigned Number;
     unsigned PersonalityIndex;
-    bool hasCalls;
+    bool adjustsStack;
     bool hasLandingPads;
     std::vector<MachineMove> Moves;
     const Function *function;
@@ -55,7 +55,7 @@ class DwarfException {
                         const std::vector<MachineMove> &M,
                         const Function *f):
       FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
-      hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { }
+      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
   };
 
   std::vector<FunctionEHFrameInfo> EHFrames;
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index a8c3c7b..f92127f 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -104,6 +104,21 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(getModule(), AP, "frametable");
 
+  int NumDescriptors = 0;
+  for (iterator I = begin(), IE = end(); I != IE; ++I) {
+    GCFunctionInfo &FI = **I;
+    for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+      NumDescriptors++;
+    }
+  }
+
+  if (NumDescriptors >= 1<<16) {
+    // Very rude!
+    report_fatal_error(" Too much descriptor for ocaml GC");
+  }
+  AP.EmitInt16(NumDescriptors);
+  AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
   for (iterator I = begin(), IE = end(); I != IE; ++I) {
     GCFunctionInfo &FI = **I;
 
@@ -135,11 +150,13 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
 
       for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
                                          KE = FI.live_end(J); K != KE; ++K) {
-        assert(K->StackOffset < 1<<16 &&
-               "GC root stack offset is outside of fixed stack frame and out "
-               "of range for ocaml GC!");
-
-        AP.EmitInt32(K->StackOffset);
+        if (K->StackOffset >= 1<<16) {
+          // Very rude!
+          report_fatal_error(
+                 "GC root stack offset is outside of fixed stack frame and out "
+                 "of range for ocaml GC!");
+        }
+        AP.EmitInt16(K->StackOffset);
       }
 
       AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 759fbaa..fd957b1 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -26,7 +26,7 @@
 using namespace llvm;
 
 CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi) : 
+CriticalAntiDepBreaker(MachineFunction& MFi) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
@@ -172,7 +172,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
     const TargetRegisterClass *NewRC = 0;
-    
+
     if (i < MI->getDesc().getNumOperands())
       NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
 
@@ -422,7 +422,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // breaking anti-dependence edges that aren't going to significantly
     // impact the overall schedule. There are a limited number of registers
     // and we want to save them for the important edges.
-    // 
+    //
     // TODO: Instructions with multiple defs could have multiple
     // anti-dependencies. The current code here only knows how to break one
     // edge per instruction. Note that we'd have to be able to break all of
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index e1c52f7..63bb5f2 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -83,6 +83,12 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
   return NewCI;
 }
 
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
 void IntrinsicLowering::AddPrototypes(Module &M) {
   LLVMContext &Context = M.getContext();
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 331dc7d..b584704 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -65,6 +65,12 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
     cl::desc("Print LLVM IR input to isel pass"));
 static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+    cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+    cl::desc("Show instruction structure in .s output"));
+static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
+    cl::desc("Enable MC API logging"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
@@ -131,21 +137,33 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
       getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+
+    // Create a code emitter if asked to show the encoding.
+    //
+    // FIXME: These are currently leaked.
+    MCCodeEmitter *MCE = 0;
+    if (ShowMCEncoding)
+      MCE = getTarget().createCodeEmitter(*this, *Context);
+
     AsmStreamer.reset(createAsmStreamer(*Context, Out,
                                         getTargetData()->isLittleEndian(),
                                         getVerboseAsm(), InstPrinter,
-                                        /*codeemitter*/0));
+                                        MCE, ShowMCInst));
     break;
   }
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
+    //
+    // FIXME: These are currently leaked.
     MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
     TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
     if (MCE == 0 || TAB == 0)
       return true;
-    
-    AsmStreamer.reset(createMachOStreamer(*Context, *TAB, Out, MCE));
+
+    AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
+                                                       *TAB, Out, MCE,
+                                                       hasMCRelaxAll()));
     break;
   }
   case CGFT_Null:
@@ -154,7 +172,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     AsmStreamer.reset(createNullStreamer(*Context));
     break;
   }
-  
+
+  if (EnableMCLogging)
+    AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
   FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
   if (Printer == 0)
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index f1bd573..03b4eab 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -68,7 +68,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
   return OnlyAvailablePred;
 }
 
-void LatencyPriorityQueue::push_impl(SUnit *SU) {
+void LatencyPriorityQueue::push(SUnit *SU) {
   // Look at all of the successors of this node.  Count the number of nodes that
   // this node is the sole unscheduled node for.
   unsigned NumNodesBlocking = 0;
@@ -79,7 +79,7 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
   }
   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
   
-  Queue.push(SU);
+  Queue.push_back(SU);
 }
 
 
@@ -114,3 +114,25 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
   // NumNodesSolelyBlocking value.
   push(OnlyAvailablePred);
 }
+
+SUnit *LatencyPriorityQueue::pop() {
+  if (empty()) return NULL;
+  std::vector<SUnit *>::iterator Best = Queue.begin();
+  for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+       E = Queue.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Queue.end()))
+    std::swap(*Best, Queue.back());
+  Queue.pop_back();
+  return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+  assert(!Queue.empty() && "Queue is empty!");
+  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  if (I != prior(Queue.end()))
+    std::swap(*I, Queue.back());
+  Queue.pop_back();
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index ca9921c..a6d38ad 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -263,7 +263,7 @@ static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
 #endif
 
 static
-bool MultipleDefsByMI(const MachineInstr &MI, unsigned MOIdx) {
+bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
   unsigned Reg = MI.getOperand(MOIdx).getReg();
   for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
@@ -279,6 +279,24 @@ bool MultipleDefsByMI(const MachineInstr &MI, unsigned MOIdx) {
   return false;
 }
 
+/// isPartialRedef - Return true if the specified def at the specific index is
+/// partially re-defining the specified live interval. A common case of this is
+/// a definition of the sub-register. 
+bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+                                   LiveInterval &interval) {
+  if (!MO.getSubReg() || MO.isEarlyClobber())
+    return false;
+
+  SlotIndex RedefIndex = MIIdx.getDefIndex();
+  const LiveRange *OldLR =
+    interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+  if (OldLR->valno->isDefAccurate()) {
+    MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+    return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
+  }
+  return false;
+}
+
 void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              MachineBasicBlock::iterator mi,
                                              SlotIndex MIIdx,
@@ -302,15 +320,20 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // of inputs.
     if (MO.isEarlyClobber())
       defIndex = MIIdx.getUseIndex();
-    VNInfo *ValNo;
+
+    // Make sure the first definition is not a partial redefinition. Add an
+    // <imp-def> of the full register.
+    if (MO.getSubReg())
+      mi->addRegisterDefined(interval.reg);
+
     MachineInstr *CopyMI = NULL;
     unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
     if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() ||
         tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
       CopyMI = mi;
-    // Earlyclobbers move back one.
-    ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
 
+    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
+                                          VNInfoAllocator);
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
     // Loop over all of the blocks that the vreg is defined in.  There are
@@ -389,9 +412,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     }
 
   } else {
-    if (MultipleDefsByMI(*mi, MOIdx))
-      // Mutple defs of the same virtual register by the same instruction. e.g.
-      // %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+    if (MultipleDefsBySameMI(*mi, MOIdx))
+      // Multiple defs of the same virtual register by the same instruction.
+      // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
       // This is likely due to elimination of REG_SEQUENCE instructions. Return
       // here since there is nothing to do.
       return;
@@ -400,13 +423,21 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // must be due to phi elimination or two addr elimination.  If this is
     // the result of two address elimination, then the vreg is one of the
     // def-and-use register operand.
-    if (mi->isRegTiedToUseOperand(MOIdx)) {
+
+    // It may also be partial redef like this:
+    // 80	%reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+    // 120	%reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+    bool PartReDef = isPartialRedef(MIIdx, MO, interval);
+    if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
       // If this is a two-address definition, then we have already processed
       // the live range.  The only problem is that we didn't realize there
       // are actually two values in the live interval.  Because of this we
       // need to take the LiveRegion that defines this register and split it
       // into two values.
-      assert(interval.containsOneValue());
+      // Two-address vregs should always only be redefined once.  This means
+      // that at this point, there should be exactly one value number in it.
+      assert((PartReDef || interval.containsOneValue()) &&
+             "Unexpected 2-addr liveint!");
       SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex();
       SlotIndex RedefIndex = MIIdx.getDefIndex();
       if (MO.isEarlyClobber())
@@ -420,10 +451,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // because the 2-addr copy must be in the same MBB as the redef.
       interval.removeRange(DefIndex, RedefIndex);
 
-      // Two-address vregs should always only be redefined once.  This means
-      // that at this point, there should be exactly one value number in it.
-      assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
-
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
       VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
@@ -434,6 +461,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
       OldValNo->setCopy(0);
+
+      // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
+      unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+      if (PartReDef &&
+          tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+        OldValNo->setCopy(&*mi);
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -451,8 +484,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
           dbgs() << " RESULT: ";
           interval.print(dbgs(), tri_);
         });
-    } else {
-      assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register");
+    } else if (lv_->isPHIJoin(interval.reg)) {
       // In the case of PHI elimination, each variable definition is only
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
@@ -475,6 +507,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       ValNo->addKill(indexes_->getTerminatorGap(mbb));
       ValNo->setHasPHIKill(true);
       DEBUG(dbgs() << " phi-join +" << LR);
+    } else {
+      llvm_unreachable("Multiply defined register");
     }
   }
 
@@ -528,7 +562,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
       end = baseIndex.getDefIndex();
       goto exit;
     } else {
-      int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
+      int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
       if (DefIdx != -1) {
         if (mi->isRegTiedToUseOperand(DefIdx)) {
           // Two-address instruction.
@@ -590,7 +624,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
     for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
       // If MI also modifies the sub-register explicitly, avoid processing it
       // more than once. Do not pass in TRI here so it checks for exact match.
-      if (!MI->modifiesRegister(*AS))
+      if (!MI->definesRegister(*AS))
         handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
                                   getOrCreateInterval(*AS), 0);
   }
@@ -631,7 +665,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       end = baseIndex.getDefIndex();
       SeenDefUse = true;
       break;
-    } else if (mi->modifiesRegister(interval.reg, tri_)) {
+    } else if (mi->definesRegister(interval.reg, tri_)) {
       // Another instruction redefines the register before it is ever read.
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
@@ -1343,7 +1377,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
       MI->eraseFromParent();
       continue;
     }
-    assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
+    assert(!(O.isImplicit() && O.isUse()) &&
+           "Spilling register that's used as implicit use?");
     SlotIndex index = getInstructionIndex(MI);
     if (index < start || index >= end)
       continue;
@@ -1605,7 +1640,7 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
   // overflow a float. This expression behaves like 10^d for small d, but is
   // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
   // headroom before overflow.
-  float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+  float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
 
   return (isDef + isUse) * lc;
 }
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index b4ef648..b0348a5 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -140,7 +140,8 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
     // Insert copy
     const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
     const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg);
-    bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
+    bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS,
+                                     MI->getDebugLoc());
     (void)Emitted;
     assert(Emitted && "Subreg and Dst must be of compatible register class");
     // Transfer the kill/dead flags, if needed.
@@ -193,7 +194,8 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
     const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
-    bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
+                                     MI->getDebugLoc());
     (void)Emitted;
     assert(Emitted && "Subreg and Dst must be of compatible register class");
     // Transfer the kill/dead flags, if needed.
@@ -262,7 +264,8 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
       BuildMI(*MBB, MI, MI->getDebugLoc(),
               TII->get(TargetOpcode::KILL), DstSubReg);
     else {
-      bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+      bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
+                                       MI->getDebugLoc());
       (void)Emitted;
       assert(Emitted && "Subreg and Dst must be of compatible register class");
     }
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 84c3d71..6f4f7a8 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -30,6 +31,9 @@ using namespace llvm;
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
 
+static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs",
+                                cl::init(false), cl::Hidden);
+
 namespace {
   class MachineCSE : public MachineFunctionPass {
     const TargetInstrInfo *TII;
@@ -39,7 +43,7 @@ namespace {
     MachineRegisterInfo *MRI;
   public:
     static char ID; // Pass identification
-    MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
+    MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     
@@ -52,6 +56,7 @@ namespace {
     }
 
   private:
+    const unsigned LookAheadLimit;
     typedef ScopedHashTableScope<MachineInstr*, unsigned,
                                  MachineInstrExpressionTrait> ScopeType;
     DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
@@ -62,8 +67,12 @@ namespace {
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
                                 MachineBasicBlock::const_iterator I,
-                                MachineBasicBlock::const_iterator E);
-    bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB);
+                                MachineBasicBlock::const_iterator E) const ;
+    bool hasLivePhysRegDefUse(const MachineInstr *MI,
+                              const MachineBasicBlock *MBB,
+                              unsigned &PhysDef) const;
+    bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
+                           unsigned PhysDef) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
@@ -112,6 +121,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
       DEBUG(dbgs() << "Coalescing: " << *DefMI);
       DEBUG(dbgs() << "*** to: " << *MI);
       MO.setReg(SrcReg);
+      MRI->clearKillFlags(SrcReg);
       if (NewRC != SRC)
         MRI->setRegClass(SrcReg, NewRC);
       DefMI->eraseFromParent();
@@ -123,10 +133,11 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
   return Changed;
 }
 
-bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
-                                        MachineBasicBlock::const_iterator I,
-                                        MachineBasicBlock::const_iterator E) {
-  unsigned LookAheadLeft = 5;
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+                                   MachineBasicBlock::const_iterator I,
+                                   MachineBasicBlock::const_iterator E) const {
+  unsigned LookAheadLeft = LookAheadLimit;
   while (LookAheadLeft) {
     // Skip over dbg_value's.
     while (I != E && I->isDebugValue())
@@ -144,6 +155,7 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
       if (!TRI->regsOverlap(MO.getReg(), Reg))
         continue;
       if (MO.isUse())
+        // Found a use!
         return false;
       SeenDef = true;
     }
@@ -159,41 +171,73 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
 }
 
 /// hasLivePhysRegDefUse - Return true if the specified instruction read / write
-/// physical registers (except for dead defs of physical registers).
-bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){
-  unsigned PhysDef = 0;
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one.
+bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
+                                      const MachineBasicBlock *MBB,
+                                      unsigned &PhysDef) const {
+  PhysDef = 0;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+    const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
-    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      if (MO.isUse())
-        // Can't touch anything to read a physical register.
-        return true;
-      if (MO.isDead())
-        // If the def is dead, it's ok.
-        continue;
-      // Ok, this is a physical register def that's not marked "dead". That's
-      // common since this pass is run before livevariables. We can scan
-      // forward a few instructions and check if it is obviously dead.
-      if (PhysDef)
-        // Multiple physical register defs. These are rare, forget about it.
-        return true;
-      PhysDef = Reg;
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (MO.isUse())
+      // Can't touch anything to read a physical register.
+      return true;
+    if (MO.isDead())
+      // If the def is dead, it's ok.
+      continue;
+    // Ok, this is a physical register def that's not marked "dead". That's
+    // common since this pass is run before livevariables. We can scan
+    // forward a few instructions and check if it is obviously dead.
+    if (PhysDef) {
+      // Multiple physical register defs. These are rare, forget about it.
+      PhysDef = 0;
+      return true;
     }
+    PhysDef = Reg;
   }
 
   if (PhysDef) {
-    MachineBasicBlock::iterator I = MI; I = llvm::next(I);
+    MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
     if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
       return true;
   }
   return false;
 }
 
+bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
+                                  unsigned PhysDef) const {
+  // For now conservatively returns false if the common subexpression is
+  // not in the same basic block as the given instruction.
+  MachineBasicBlock *MBB = MI->getParent();
+  if (CSMI->getParent() != MBB)
+    return false;
+  MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+  MachineBasicBlock::const_iterator E = MI;
+  unsigned LookAheadLeft = LookAheadLimit;
+  while (LookAheadLeft) {
+    // Skip over dbg_value's.
+    while (I != E && I->isDebugValue())
+      ++I;
+
+    if (I == E)
+      return true;
+    if (I->modifiesRegister(PhysDef, TRI))
+      return false;
+
+    --LookAheadLeft;
+    ++I;
+  }
+
+  return false;
+}
+
 static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
   return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
@@ -326,9 +370,20 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
 
     // If the instruction defines a physical register and the value *may* be
     // used, then it's not safe to replace it with a common subexpression.
-    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB))
+    unsigned PhysDef = 0;
+    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
       FoundCSE = false;
 
+      // ... Unless the CS is local and it also defines the physical register
+      // which is not clobbered in between.
+      if (PhysDef && CSEPhysDef) {
+        unsigned CSVN = VNT.lookup(MI);
+        MachineInstr *CSMI = Exps[CSVN];
+        if (PhysRegDefReaches(CSMI, MI, PhysDef))
+          FoundCSE = true;
+      }
+    }
+
     if (!FoundCSE) {
       VNT.insert(MI, CurrVN++);
       Exps.push_back(MI);
@@ -365,8 +420,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
 
     // Actually perform the elimination.
     if (DoCSE) {
-      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i)
+      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
         MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+        MRI->clearKillFlags(CSEPairs[i].second);
+      }
       MI->eraseFromParent();
       ++NumCSEs;
     } else {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 3cf10b3..a38c881 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -398,8 +398,14 @@ void MachineFunction::viewCFGOnly() const
 unsigned MachineFunction::addLiveIn(unsigned PReg,
                                     const TargetRegisterClass *RC) {
   assert(RC->contains(PReg) && "Not the correct regclass!");
-  unsigned VReg = getRegInfo().createVirtualRegister(RC);
-  getRegInfo().addLiveIn(PReg, VReg);
+  MachineRegisterInfo &MRI = getRegInfo();
+  unsigned VReg = MRI.getLiveInVirtReg(PReg);
+  if (VReg) {
+    assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+    return VReg;
+  }
+  VReg = MRI.createVirtualRegister(RC);
+  MRI.addLiveIn(PReg, VReg);
   return VReg;
 }
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 99b5beb..e54cd5c 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -219,8 +219,12 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
         OS << "%physreg" << getReg();
     }
 
-    if (getSubReg() != 0)
-      OS << ':' << getSubReg();
+    if (getSubReg() != 0) {
+      if (TM)
+        OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
+      else
+        OS << ':' << getSubReg();
+    }
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
         isEarlyClobber()) {
@@ -781,25 +785,57 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
   }
   return -1;
 }
-  
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+                                         SmallVectorImpl<unsigned> *Ops) const {
+  bool PartDef = false; // Partial redefine.
+  bool FullDef = false; // Full define.
+  bool Use = false;
+
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.getReg() != Reg)
+      continue;
+    if (Ops)
+      Ops->push_back(i);
+    if (MO.isUse())
+      Use |= !MO.isUndef();
+    else if (MO.getSubReg())
+      PartDef = true;
+    else
+      FullDef = true;
+  }
+  // A partial redefine uses Reg unless there is also a full define.
+  return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
 /// findRegisterDefOperandIdx() - Returns the operand index that is a def of
 /// the specified register or -1 if it is not found. If isDead is true, defs
 /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
 /// also checks if there is a def of a super-register.
-int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead,
-                                          const TargetRegisterInfo *TRI) const {
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+                                        const TargetRegisterInfo *TRI) const {
+  bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned MOReg = MO.getReg();
-    if (MOReg == Reg ||
-        (TRI &&
-         TargetRegisterInfo::isPhysicalRegister(MOReg) &&
-         TargetRegisterInfo::isPhysicalRegister(Reg) &&
-         TRI->isSubRegister(MOReg, Reg)))
-      if (!isDead || MO.isDead())
-        return i;
+    bool Found = (MOReg == Reg);
+    if (!Found && TRI && isPhys &&
+        TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+      if (Overlap)
+        Found = TRI->regsOverlap(MOReg, Reg);
+      else
+        Found = TRI->isSubRegister(MOReg, Reg);
+    }
+    if (Found && (!isDead || MO.isDead()))
+      return i;
   }
   return -1;
 }
@@ -938,6 +974,16 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
   return true;
 }
 
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isUse())
+      MO.setIsKill(false);
+  }
+}
+
 /// copyKillDeadInfo - Copies kill / dead operand properties from MI.
 ///
 void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
@@ -1355,11 +1401,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
 
 void MachineInstr::addRegisterDefined(unsigned IncomingReg,
                                       const TargetRegisterInfo *RegInfo) {
-  MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
-  if (!MO || MO->getSubReg())
-    addOperand(MachineOperand::CreateReg(IncomingReg,
-                                         true  /*IsDef*/,
-                                         true  /*IsImp*/));
+  if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+    MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+    if (MO)
+      return;
+  } else {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = getOperand(i);
+      if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+          MO.getSubReg() == 0)
+        return;
+    }
+  }
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/));
 }
 
 unsigned
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index b2e757d..6120617 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -738,8 +738,10 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
              "Instructions with different phys regs are not identical!");
 
       if (MO.isReg() && MO.isDef() &&
-          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
         RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+        RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+      }
     }
     MI->eraseFromParent();
     ++NumCSEed;
@@ -784,6 +786,15 @@ void MachineLICM::Hoist(MachineInstr *MI) {
     // Otherwise, splice the instruction to the preheader.
     CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);
 
+    // Clear the kill flags of any register this instruction defines,
+    // since they may need to be live throughout the entire loop
+    // rather than just live for part of it.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDef() && !MO.isDead())
+        RegInfo->clearKillFlags(MO.getReg());
+    }
+
     // Add to the CSE map.
     if (CI != CSEMap.end())
       CI->second.push_back(MI);
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index ea5ca0c..70bf7e5 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -133,6 +133,15 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
   return ++UI == use_nodbg_end();
 }
 
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+  for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+    UI.getOperand().setIsKill(false);
+}
+
 bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
   for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
     if (I->first == Reg || I->second == Reg)
@@ -156,6 +165,15 @@ unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
   return 0;
 }
 
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == PReg)
+      return I->second;
+  return 0;
+}
+
 static cl::opt<bool>
 SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
                   cl::desc("Schedule copies of livein registers"),
@@ -218,7 +236,8 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB,
     --Pos;
   }
 
-  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC,
+                                  DebugLoc());
   assert(Emitted && "Unable to issue a live-in copy instruction!\n");
   (void) Emitted;
 
@@ -253,7 +272,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
       if (LI->second) {
         const TargetRegisterClass *RC = getRegClass(LI->second);
         bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
-                                        LI->second, LI->first, RC, RC);
+                                        LI->second, LI->first, RC, RC,
+                                        DebugLoc());
         assert(Emitted && "Unable to issue a live-in copy instruction!\n");
         (void) Emitted;
       }
@@ -265,6 +285,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
     EntryMBB->addLiveIn(I->first);
 }
 
+void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
+  for (int i = UsedPhysRegs.find_first(); i >= 0;
+       i = UsedPhysRegs.find_next(i))
+         for (const unsigned *SS = TRI.getSubRegisters(i);
+              unsigned SubReg = *SS; ++SS)
+           if (SubReg > unsigned(i))
+             UsedPhysRegs.set(SubReg);
+}
+
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
   for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index b8996d4..84d6df2 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -26,39 +26,17 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 using namespace llvm;
 
-/// BBInfo - Per-basic block information used internally by MachineSSAUpdater.
-class MachineSSAUpdater::BBInfo {
-public:
-  MachineBasicBlock *BB; // Back-pointer to the corresponding block.
-  unsigned AvailableVal; // Value to use in this block.
-  BBInfo *DefBB;         // Block that defines the available value.
-  int BlkNum;            // Postorder number.
-  BBInfo *IDom;          // Immediate dominator.
-  unsigned NumPreds;     // Number of predecessor blocks.
-  BBInfo **Preds;        // Array[NumPreds] of predecessor blocks.
-  MachineInstr *PHITag;  // Marker for existing PHIs that match.
-
-  BBInfo(MachineBasicBlock *ThisBB, unsigned V)
-    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
-      NumPreds(0), Preds(0), PHITag(0) { }
-};
-
-typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy;
-
 typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static BBMapTy *getBBMap(void *BM) {
-  return static_cast<BBMapTy*>(BM);
-}
-
 MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
                                      SmallVectorImpl<MachineInstr*> *NewPHI)
-  : AV(0), BM(0), InsertedPHIs(NewPHI) {
+  : AV(0), InsertedPHIs(NewPHI) {
   TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 }
@@ -134,7 +112,8 @@ static
 MachineInstr *InsertNewDef(unsigned Opcode,
                            MachineBasicBlock *BB, MachineBasicBlock::iterator I,
                            const TargetRegisterClass *RC,
-                           MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
+                           MachineRegisterInfo *MRI,
+                           const TargetInstrInfo *TII) {
   unsigned NewVR = MRI->createVirtualRegister(RC);
   return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
 }
@@ -263,438 +242,131 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
       I->second = NewReg;
 }
 
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it.  If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
-unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  if (unsigned V = AvailableVals[BB])
-    return V;
+/// MachinePHIiter - Iterator for PHI operands.  This is used for the
+/// PHI_iterator in the SSAUpdaterImpl template.
+namespace {
+  class MachinePHIiter {
+  private:
+    MachineInstr *PHI;
+    unsigned idx;
+ 
+  public:
+    explicit MachinePHIiter(MachineInstr *P) // begin iterator
+      : PHI(P), idx(1) {}
+    MachinePHIiter(MachineInstr *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumOperands()) {}
+
+    MachinePHIiter &operator++() { idx += 2; return *this; } 
+    bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
+    unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+    MachineBasicBlock *getIncomingBlock() {
+      return PHI->getOperand(idx+1).getMBB();
+    }
+  };
+}
 
-  // Pool allocation used internally by GetValueAtEndOfBlock.
-  BumpPtrAllocator Allocator;
-  BBMapTy BBMapObj;
-  BM = &BBMapObj;
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+  typedef MachineBasicBlock BlkT;
+  typedef unsigned ValT;
+  typedef MachineInstr PhiT;
+
+  typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+  typedef MachinePHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
+  }
 
-  SmallVector<BBInfo*, 100> BlockList;
-  BuildBlockList(BB, &BlockList, &Allocator);
+  /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+  /// vector.
+  static void FindPredecessorBlocks(MachineBasicBlock *BB,
+                                    SmallVectorImpl<MachineBasicBlock*> *Preds){
+    for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+           E = BB->pred_end(); PI != E; ++PI)
+      Preds->push_back(*PI);
+  }
 
-  // Special case: bail out if BB is unreachable.
-  if (BlockList.size() == 0) {
-    BM = 0;
+  /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+  /// Add it into the specified block and return the register.
+  static unsigned GetUndefVal(MachineBasicBlock *BB,
+                              MachineSSAUpdater *Updater) {
     // Insert an implicit_def to represent an undef value.
     MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
                                         BB, BB->getFirstTerminator(),
-                                        VRC, MRI, TII);
-    unsigned V = NewDef->getOperand(0).getReg();
-    AvailableVals[BB] = V;
-    return V;
-  }
-
-  FindDominators(&BlockList);
-  FindPHIPlacement(&BlockList);
-  FindAvailableVals(&BlockList);
-
-  BM = 0;
-  return BBMapObj[BB]->DefBB->AvailableVal;
-}
-
-/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
-/// vector, set Info->NumPreds, and allocate space in Info->Preds.
-static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info,
-                                  SmallVectorImpl<MachineBasicBlock*> *Preds,
-                                  BumpPtrAllocator *Allocator) {
-  MachineBasicBlock *BB = Info->BB;
-  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
-         E = BB->pred_end(); PI != E; ++PI)
-    Preds->push_back(*PI);
-
-  Info->NumPreds = Preds->size();
-  Info->Preds = static_cast<MachineSSAUpdater::BBInfo**>
-    (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*),
-                         AlignOf<MachineSSAUpdater::BBInfo*>::Alignment));
-}
-
-/// BuildBlockList - Starting from the specified basic block, traverse back
-/// through its predecessors until reaching blocks with known values.  Create
-/// BBInfo structures for the blocks and append them to the block list.
-void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB,
-                                       BlockListTy *BlockList,
-                                       BumpPtrAllocator *Allocator) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<BBInfo*, 10> RootList;
-  SmallVector<BBInfo*, 64> WorkList;
-
-  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
-  (*BBMap)[BB] = Info;
-  WorkList.push_back(Info);
-
-  // Search backward from BB, creating BBInfos along the way and stopping when
-  // reaching blocks that define the value.  Record those defining blocks on
-  // the RootList.
-  SmallVector<MachineBasicBlock*, 10> Preds;
-  while (!WorkList.empty()) {
-    Info = WorkList.pop_back_val();
-    Preds.clear();
-    FindPredecessorBlocks(Info, &Preds, Allocator);
-
-    // Treat an unreachable predecessor as a definition with 'undef'.
-    if (Info->NumPreds == 0) {
-      // Insert an implicit_def to represent an undef value.
-      MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
-                                          Info->BB,
-                                          Info->BB->getFirstTerminator(),
-                                          VRC, MRI, TII);
-      Info->AvailableVal = NewDef->getOperand(0).getReg();
-      Info->DefBB = Info;
-      RootList.push_back(Info);
-      continue;
-    }
-
-    for (unsigned p = 0; p != Info->NumPreds; ++p) {
-      MachineBasicBlock *Pred = Preds[p];
-      // Check if BBMap already has a BBInfo for the predecessor block.
-      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
-      if (BBMapBucket.second) {
-        Info->Preds[p] = BBMapBucket.second;
-        continue;
-      }
-
-      // Create a new BBInfo for the predecessor.
-      unsigned PredVal = AvailableVals.lookup(Pred);
-      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
-      BBMapBucket.second = PredInfo;
-      Info->Preds[p] = PredInfo;
-
-      if (PredInfo->AvailableVal) {
-        RootList.push_back(PredInfo);
-        continue;
-      }
-      WorkList.push_back(PredInfo);
-    }
+                                        Updater->VRC, Updater->MRI,
+                                        Updater->TII);
+    return NewDef->getOperand(0).getReg();
   }
 
-  // Now that we know what blocks are backwards-reachable from the starting
-  // block, do a forward depth-first traversal to assign postorder numbers
-  // to those blocks.
-  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
-  unsigned BlkNum = 1;
-
-  // Initialize the worklist with the roots from the backward traversal.
-  while (!RootList.empty()) {
-    Info = RootList.pop_back_val();
-    Info->IDom = PseudoEntry;
-    Info->BlkNum = -1;
-    WorkList.push_back(Info);
+  /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+  /// Add it into the specified block and return the register.
+  static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+                                 MachineSSAUpdater *Updater) {
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+                                     Updater->VRC, Updater->MRI,
+                                     Updater->TII);
+    return PHI->getOperand(0).getReg();
   }
 
-  while (!WorkList.empty()) {
-    Info = WorkList.back();
-
-    if (Info->BlkNum == -2) {
-      // All the successors have been handled; assign the postorder number.
-      Info->BlkNum = BlkNum++;
-      // If not a root, put it on the BlockList.
-      if (!Info->AvailableVal)
-        BlockList->push_back(Info);
-      WorkList.pop_back();
-      continue;
-    }
-
-    // Leave this entry on the worklist, but set its BlkNum to mark that its
-    // successors have been put on the worklist.  When it returns to the top
-    // the list, after handling its successors, it will be assigned a number.
-    Info->BlkNum = -2;
-
-    // Add unvisited successors to the work list.
-    for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(),
-           E = Info->BB->succ_end(); SI != E; ++SI) {
-      BBInfo *SuccInfo = (*BBMap)[*SI];
-      if (!SuccInfo || SuccInfo->BlkNum)
-        continue;
-      SuccInfo->BlkNum = -1;
-      WorkList.push_back(SuccInfo);
-    }
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+                            MachineBasicBlock *Pred) {
+    PHI->addOperand(MachineOperand::CreateReg(Val, false));
+    PHI->addOperand(MachineOperand::CreateMBB(Pred));
   }
-  PseudoEntry->BlkNum = BlkNum;
-}
 
-/// IntersectDominators - This is the dataflow lattice "meet" operation for
-/// finding dominators.  Given two basic blocks, it walks up the dominator
-/// tree until it finds a common dominator of both.  It uses the postorder
-/// number of the blocks to determine how to do that.
-static MachineSSAUpdater::BBInfo *
-IntersectDominators(MachineSSAUpdater::BBInfo *Blk1,
-                    MachineSSAUpdater::BBInfo *Blk2) {
-  while (Blk1 != Blk2) {
-    while (Blk1->BlkNum < Blk2->BlkNum) {
-      Blk1 = Blk1->IDom;
-      if (!Blk1)
-        return Blk2;
-    }
-    while (Blk2->BlkNum < Blk1->BlkNum) {
-      Blk2 = Blk2->IDom;
-      if (!Blk2)
-        return Blk1;
-    }
-  }
-  return Blk1;
-}
-
-/// FindDominators - Calculate the dominator tree for the subset of the CFG
-/// corresponding to the basic blocks on the BlockList.  This uses the
-/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
-/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
-/// Because the CFG subset does not include any edges leading into blocks that
-/// define the value, the results are not the usual dominator tree.  The CFG
-/// subset has a single pseudo-entry node with edges to a set of root nodes
-/// for blocks that define the value.  The dominators for this subset CFG are
-/// not the standard dominators but they are adequate for placing PHIs within
-/// the subset CFG.
-void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) {
-  bool Changed;
-  do {
-    Changed = false;
-    // Iterate over the list in reverse order, i.e., forward on CFG edges.
-    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-           E = BlockList->rend(); I != E; ++I) {
-      BBInfo *Info = *I;
-
-      // Start with the first predecessor.
-      assert(Info->NumPreds > 0 && "unreachable block");
-      BBInfo *NewIDom = Info->Preds[0];
-
-      // Iterate through the block's other predecessors.
-      for (unsigned p = 1; p != Info->NumPreds; ++p) {
-        BBInfo *Pred = Info->Preds[p];
-        NewIDom = IntersectDominators(NewIDom, Pred);
-      }
-
-      // Check if the IDom value has changed.
-      if (NewIDom != Info->IDom) {
-        Info->IDom = NewIDom;
-        Changed = true;
-      }
-    }
-  } while (Changed);
-}
-
-/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
-/// any blocks containing definitions of the value.  If one is found, then the
-/// successor of Pred is in the dominance frontier for the definition, and
-/// this function returns true.
-static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred,
-                               const MachineSSAUpdater::BBInfo *IDom) {
-  for (; Pred != IDom; Pred = Pred->IDom) {
-    if (Pred->DefBB == Pred)
-      return true;
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static MachineInstr *InstrIsPHI(MachineInstr *I) {
+    if (I && I->isPHI())
+      return I;
+    return 0;
   }
-  return false;
-}
-
-/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
-/// the known definitions.  Iteratively add PHIs in the dom frontiers until
-/// nothing changes.  Along the way, keep track of the nearest dominating
-/// definitions for non-PHI blocks.
-void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
-  bool Changed;
-  do {
-    Changed = false;
-    // Iterate over the list in reverse order, i.e., forward on CFG edges.
-    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-           E = BlockList->rend(); I != E; ++I) {
-      BBInfo *Info = *I;
-
-      // If this block already needs a PHI, there is nothing to do here.
-      if (Info->DefBB == Info)
-        continue;
-
-      // Default to use the same def as the immediate dominator.
-      BBInfo *NewDefBB = Info->IDom->DefBB;
-      for (unsigned p = 0; p != Info->NumPreds; ++p) {
-        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
-          // Need a PHI here.
-          NewDefBB = Info;
-          break;
-        }
-      }
 
-      // Check if anything changed.
-      if (NewDefBB != Info->DefBB) {
-        Info->DefBB = NewDefBB;
-        Changed = true;
-      }
-    }
-  } while (Changed);
-}
-
-/// FindAvailableVal - If this block requires a PHI, first check if an existing
-/// PHI matches the PHI placement and reaching definitions computed earlier,
-/// and if not, create a new PHI.  Visit all the block's predecessors to
-/// calculate the available value for each one and fill in the incoming values
-/// for a new PHI.
-void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-
-  // Go through the worklist in forward order (i.e., backward through the CFG)
-  // and check if existing PHIs can be used.  If not, create empty PHIs where
-  // they are needed.
-  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
-       I != E; ++I) {
-    BBInfo *Info = *I;
-    // Check if there needs to be a PHI in BB.
-    if (Info->DefBB != Info)
-      continue;
-
-    // Look for an existing PHI.
-    FindExistingPHI(Info->BB, BlockList);
-    if (Info->AvailableVal)
-      continue;
-
-    MachineBasicBlock::iterator Loc =
-      Info->BB->empty() ? Info->BB->end() : Info->BB->front();
-    MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc,
-                                             VRC, MRI, TII);
-    unsigned PHI = InsertedPHI->getOperand(0).getReg();
-    Info->AvailableVal = PHI;
-    AvailableVals[Info->BB] = PHI;
+  /// ValueIsPHI - Check if the instruction that defines the specified register
+  /// is a PHI instruction.
+  static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+    return InstrIsPHI(Updater->MRI->getVRegDef(Val));
   }
 
-  // Now go back through the worklist in reverse order to fill in the arguments
-  // for any new PHIs added in the forward traversal.
-  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-         E = BlockList->rend(); I != E; ++I) {
-    BBInfo *Info = *I;
-
-    if (Info->DefBB != Info) {
-      // Record the available value at join nodes to speed up subsequent
-      // uses of this SSAUpdater for the same value.
-      if (Info->NumPreds > 1)
-        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
-      continue;
-    }
-
-    // Check if this block contains a newly added PHI.
-    unsigned PHI = Info->AvailableVal;
-    MachineInstr *InsertedPHI = MRI->getVRegDef(PHI);
-    if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1)
-      continue;
-
-    // Iterate through the block's predecessors.
-    MachineInstrBuilder MIB(InsertedPHI);
-    for (unsigned p = 0; p != Info->NumPreds; ++p) {
-      BBInfo *PredInfo = Info->Preds[p];
-      MachineBasicBlock *Pred = PredInfo->BB;
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != PredInfo)
-        PredInfo = PredInfo->DefBB;
-      MIB.addReg(PredInfo->AvailableVal).addMBB(Pred);
-    }
-
-    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
-
-    // If the client wants to know about all new instructions, tell it.
-    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+    MachineInstr *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumOperands() <= 1)
+      return PHI;
+    return 0;
   }
-}
 
-/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
-/// them match what is needed.
-void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB,
-                                        BlockListTy *BlockList) {
-  for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
-       BBI != BBE && BBI->isPHI(); ++BBI) {
-    if (CheckIfPHIMatches(BBI)) {
-      RecordMatchingPHI(BBI);
-      break;
-    }
-    // Match failed: clear all the PHITag values.
-    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
-         I != E; ++I)
-      (*I)->PHITag = 0;
+  /// GetPHIValue - For the specified PHI instruction, return the register
+  /// that it defines.
+  static unsigned GetPHIValue(MachineInstr *PHI) {
+    return PHI->getOperand(0).getReg();
   }
-}
-
-/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
-/// in the BBMap.
-bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<MachineInstr*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Mark that the block containing this PHI has been visited.
-  (*BBMap)[PHI->getParent()]->PHITag = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
-      unsigned IncomingVal = PHI->getOperand(i).getReg();
-      BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()];
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != PredInfo)
-        PredInfo = PredInfo->DefBB;
-
-      // Check if it matches the expected value.
-      if (PredInfo->AvailableVal) {
-        if (IncomingVal == PredInfo->AvailableVal)
-          continue;
-        return false;
-      }
-
-      // Check if the value is a PHI in the correct block.
-      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
-      if (!IncomingPHIVal->isPHI() ||
-          IncomingPHIVal->getParent() != PredInfo->BB)
-        return false;
-
-      // If this block has already been visited, check if this PHI matches.
-      if (PredInfo->PHITag) {
-        if (IncomingPHIVal == PredInfo->PHITag)
-          continue;
-        return false;
-      }
-      PredInfo->PHITag = IncomingPHIVal;
+};
 
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
-  return true;
-}
+} // End llvm namespace
 
-/// RecordMatchingPHI - For a PHI node that matches, record it and its input
-/// PHIs in both the BBMap and the AvailableVals mapping.
-void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  SmallVector<MachineInstr*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Record this PHI.
-  MachineBasicBlock *BB = PHI->getParent();
-  AvailableVals[BB] = PHI->getOperand(0).getReg();
-  (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg();
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
-      unsigned IncomingVal = PHI->getOperand(i).getReg();
-      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
-      if (!IncomingPHIVal->isPHI()) continue;
-      BB = IncomingPHIVal->getParent();
-      BBInfo *Info = (*BBMap)[BB];
-      if (!Info || Info->AvailableVal)
-        continue;
-
-      // Record the PHI and add it to the worklist.
-      AvailableVals[BB] = IncomingVal;
-      Info->AvailableVal = IncomingVal;
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
+  if (unsigned V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
 }
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index ef489dc..1610e6c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -314,5 +314,10 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // Move the instruction.
   SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
                        ++MachineBasicBlock::iterator(MI));
+
+  // Conservatively, clear any kill flags, since it's possible that
+  // they are no longer correct.
+  MI->clearKillInfo();
+
   return true;
 }
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 0b75c55..8baf01c 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -47,7 +47,7 @@ namespace {
     MachineVerifier(Pass *pass, bool allowDoubleDefs) :
       PASS(pass),
       allowVirtDoubleDefs(allowDoubleDefs),
-      allowPhysDoubleDefs(allowDoubleDefs),
+      allowPhysDoubleDefs(true),
       OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
       {}
 
@@ -552,19 +552,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       regsLiveInButUnused.erase(Reg);
 
       bool isKill = false;
-      if (MO->isKill()) {
-        isKill = true;
-        // Tied operands on two-address instuctions MUST NOT have a <kill> flag.
-        if (MI->isRegTiedToDefOperand(MONum))
+      unsigned defIdx;
+      if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+        // A two-addr use counts as a kill if use and def are the same.
+        unsigned DefReg = MI->getOperand(defIdx).getReg();
+        if (Reg == DefReg) {
+          isKill = true;
+          // ANd in that case an explicit kill flag is not allowed.
+          if (MO->isKill())
             report("Illegal kill flag on two-address instruction operand",
                    MO, MONum);
-      } else {
-        // TwoAddress instr modifying a reg is treated as kill+def.
-        unsigned defIdx;
-        if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
-            MI->getOperand(defIdx).getReg() == Reg)
-          isKill = true;
-      }
+        } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          report("Two-address instruction operands must be identical",
+                 MO, MONum);
+        }
+      } else
+        isKill = MO->isKill();
+
       if (isKill) {
         addRegWithSubRegs(regsKilled, Reg);
 
@@ -631,11 +635,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         // Virtual register.
         const TargetRegisterClass *RC = MRI->getRegClass(Reg);
         if (SubIdx) {
-          if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) {
+          const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx);
+          if (!SRC) {
             report("Invalid subregister index for virtual register", MO, MONum);
+            *OS << "Register class " << RC->getName()
+                << " does not support subreg index " << SubIdx << "\n";
             return;
           }
-          RC = *(RC->subregclasses_begin()+SubIdx);
+          RC = SRC;
         }
         if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
           if (RC != DRC && !RC->hasSuperClass(DRC)) {
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 1651719..edbc13f 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -27,10 +27,8 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <map>
 using namespace llvm;
@@ -88,10 +86,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   ImpDefs.clear();
   VRegPHIUseCount.clear();
 
-  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
-  // SSA form.
-  Changed |= EliminateRegSequences(MF);
-
   return Changed;
 }
 
@@ -216,7 +210,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     } else {
       entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
     }
-    TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
+    TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC,
+                      MPhi->getDebugLoc());
   }
 
   // Update live variable information if there is any.
@@ -298,7 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 
     // Insert the copy.
     if (!reusedIncoming && IncomingReg)
-      TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
+      TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC,
+                        MPhi->getDebugLoc());
 
     // Now update live variable information if we have it.  Otherwise we're done
     if (!LV) continue;
@@ -449,58 +445,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
 
   return NMBB;
 }
-
-static void UpdateRegSequenceSrcs(unsigned SrcReg,
-                                  unsigned DstReg, unsigned SrcIdx,
-                                  MachineRegisterInfo *MRI) {
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-         UE = MRI->reg_end(); RI != UE; ) {
-    MachineOperand &MO = RI.getOperand();
-    ++RI;
-    MO.setReg(DstReg);
-    MO.setSubReg(SrcIdx);
-  }
-}
-
-/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as second part
-/// of de-ssa process. This replaces sources of REG_SEQUENCE as sub-register
-/// references of the register defined by REG_SEQUENCE. e.g.
-///
-/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
-/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
-/// =>
-/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool PHIElimination::EliminateRegSequences(MachineFunction &MF) {
-  bool Changed = false;
-
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
-         BBI != BBE; ) {
-      MachineInstr &MI = *BBI;
-      ++BBI;
-      if (MI.getOpcode() != TargetOpcode::REG_SEQUENCE)
-        continue;
-      unsigned DstReg = MI.getOperand(0).getReg();
-      if (MI.getOperand(0).getSubReg() ||
-          TargetRegisterInfo::isPhysicalRegister(DstReg) ||
-          !(MI.getNumOperands() & 1)) {
-        DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
-        llvm_unreachable(0);
-      }
-      for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
-        unsigned SrcReg = MI.getOperand(i).getReg();
-        if (MI.getOperand(i).getSubReg() ||
-            TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-          DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
-          llvm_unreachable(0);
-        }
-        unsigned SrcIdx = MI.getOperand(i+1).getImm();
-        UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI);
-      }
-
-      MI.eraseFromParent();
-      Changed = true;
-    }
-
-  return Changed;
-}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 3292aa2..7dedf03 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -94,8 +94,6 @@ namespace llvm {
       return I;
     }
 
-    bool EliminateRegSequences(MachineFunction &MF);
-
     typedef std::pair<unsigned, unsigned> BBVRegPair;
     typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index d3e1295..9714ea6 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -114,7 +114,7 @@ namespace {
     /// AvailableQueue - The priority queue to use for the available SUnits.
     ///
     LatencyPriorityQueue AvailableQueue;
-  
+
     /// PendingQueue - This contains all of the instructions whose operands have
     /// been issued, but their results are not ready yet (due to the latency of
     /// the operation).  Once the operands becomes available, the instruction is
@@ -158,7 +158,7 @@ namespace {
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
     void Schedule();
-    
+
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
@@ -179,7 +179,7 @@ namespace {
     void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
     void ListScheduleTopDown();
     void StartBlockForKills(MachineBasicBlock *BB);
-    
+
     // ToggleKillFlag - Toggle a register operand kill flag. Other
     // adjustments may be made to the instruction if necessary. Return
     // true if the operand has been deleted, false if not.
@@ -197,13 +197,13 @@ static bool isSchedulingBoundary(const MachineInstr *MI,
   if (MI->getDesc().isTerminator() || MI->isLabel())
     return true;
 
-  // Don't attempt to schedule around any instruction that modifies
+  // Don't attempt to schedule around any instruction that defines
   // a stack-oriented pointer, as it's unlikely to be profitable. This
   // saves compile time, because it doesn't require every single
   // stack slot reference to depend on the instruction that does the
   // modification.
   const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
-  if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+  if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
     return true;
 
   return false;
@@ -227,9 +227,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   // Check for antidep breaking override...
   if (EnableAntiDepBreaking.getPosition() > 0) {
-    AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL :
-      (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL :
-      TargetSubtarget::ANTIDEP_NONE;
+    AntiDepMode = (EnableAntiDepBreaking == "all") ?
+      TargetSubtarget::ANTIDEP_ALL :
+        (EnableAntiDepBreaking == "critical")
+           ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
   }
 
   DEBUG(dbgs() << "PostRAScheduler\n");
@@ -240,10 +241,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
     (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
     (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
-  AntiDepBreaker *ADB = 
+  AntiDepBreaker *ADB =
     ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
      (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
-     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? 
+     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
       (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
 
   SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
@@ -265,17 +266,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     // Initialize register live-range state for scheduling in this block.
     Scheduler.StartBlock(MBB);
 
-    // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA
-    // scheduler has some sort of problem with DebugValue instructions that
-    // causes an assertion in LeaksContext.h to fail occasionally.  Just
-    // remove all those instructions for now.
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ) {
-      MachineInstr *MI = &*I++;
-      if (MI->isDebugValue())
-        MI->eraseFromParent();
-    }
-
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
     MachineBasicBlock::iterator Current = MBB->end();
@@ -310,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   return true;
 }
-  
+
 /// StartBlock - Initialize register live-range state for scheduling in
 /// this block.
 ///
@@ -331,10 +321,10 @@ void SchedulePostRATDList::Schedule() {
   BuildSchedGraph(AA);
 
   if (AntiDepBreak != NULL) {
-    unsigned Broken = 
+    unsigned Broken =
       AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
                                           InsertPosIndex);
-    
+
     if (Broken != 0) {
       // We made changes. Update the dependency graph.
       // Theoretically we could update the graph in place:
@@ -347,7 +337,7 @@ void SchedulePostRATDList::Schedule() {
       EntrySU = SUnit();
       ExitSU = SUnit();
       BuildSchedGraph(AA);
-      
+
       NumFixedAnti += Broken;
     }
   }
@@ -425,7 +415,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
     MO.setIsKill(true);
     return false;
   }
-  
+
   // If MO itself is live, clear the kill flag...
   if (KillIndices[MO.getReg()] != ~0u) {
     MO.setIsKill(false);
@@ -464,7 +454,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   BitVector ReservedRegs = TRI->getReservedRegs(MF);
 
   StartBlockForKills(MBB);
-  
+
   // Examine block from end to start...
   unsigned Count = MBB->size();
   for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
@@ -484,9 +474,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       if (!MO.isDef()) continue;
       // Ignore two-addr defs.
       if (MI->isRegTiedToUseOperand(i)) continue;
-      
+
       KillIndices[Reg] = ~0u;
-      
+
       // Repeat for all subregs.
       for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
            *Subreg; ++Subreg) {
@@ -521,17 +511,17 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
         if (kill)
           kill = (KillIndices[Reg] == ~0u);
       }
-      
+
       if (MO.isKill() != kill) {
         DEBUG(dbgs() << "Fixing " << MO << " in ");
         // Warning: ToggleKillFlag may invalidate MO.
         ToggleKillFlag(MI, MO);
         DEBUG(MI->dump());
       }
-      
+
       killedRegs.insert(Reg);
     }
-    
+
     // Mark any used register (that is not using undef) and subregs as
     // now live...
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -541,7 +531,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
 
       KillIndices[Reg] = Count;
-      
+
       for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
            *Subreg; ++Subreg) {
         KillIndices[*Subreg] = Count;
@@ -573,7 +563,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
   SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
-  
+
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -594,9 +584,9 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
-  
+
   Sequence.push_back(SU);
-  assert(CurCycle >= SU->getDepth() && 
+  assert(CurCycle >= SU->getDepth() &&
          "Node scheduled above its depth!");
   SU->setDepthToAtLeast(CurCycle);
 
@@ -609,7 +599,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 /// schedulers.
 void SchedulePostRATDList::ListScheduleTopDown() {
   unsigned CurCycle = 0;
-  
+
   // We're scheduling top-down but we're visiting the regions in
   // bottom-up order, so we don't know the hazards at the start of a
   // region. So assume no hazards (this should usually be ok as most
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 2d49beb..96e7327 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -882,7 +882,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
          !RefsInMBB.count(FoldPt))
     --FoldPt;
   
-  int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false);
+  int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg);
   if (OpIdx == -1)
     return 0;
   
@@ -1061,7 +1061,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       // Add spill.
     
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
+      TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC,
+                               TRI);
       SpillMI = prior(SpillPt);
       SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
     }
@@ -1097,7 +1098,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       }
       // Add spill. 
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
+      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC,
+                               TRI);
       SpillMI = prior(SpillPt);
       SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
     }
@@ -1116,7 +1118,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     RestoreIndex = LIs->getInstructionIndex(RestorePt);
     FoldedRestore = true;
   } else {
-    TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
+    TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI);
     MachineInstr *LoadMI = prior(RestorePt);
     RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
   }
@@ -1152,7 +1154,7 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
     // codegen is not modelling. Ignore these barriers for now.
     if (!TII->isSafeToMoveRegClassDefs(*RC))
       continue;
-    std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
+    const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
     for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
       unsigned Reg = VRs[i];
       if (!LIs->hasInterval(Reg))
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index d7179b3..62f525f 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -46,7 +46,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
                                                  const TargetInstrInfo *tii_) {
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
   if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      Reg == SrcReg)
+      Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0)
     return true;
 
   if (OpIdx == 2 && MI->isSubregToReg())
@@ -89,6 +89,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       MachineInstr *MI = &*I;
       ++I;
       if (MI->isImplicitDef()) {
+        if (MI->getOperand(0).getSubReg())
+          continue;
         unsigned Reg = MI->getOperand(0).getReg();
         ImpDefRegs.insert(Reg);
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -218,7 +220,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         // Turn a copy use into an implicit_def.
         unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
         if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-            Reg == SrcReg) {
+            Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) {
           RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
 
           bool isKill = false;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index a454b62..e778024 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -58,8 +58,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
   FrameConstantRegMap.clear();
 
-  // Calculate the MaxCallFrameSize and HasCalls variables for the function's
-  // frame information. Also eliminates call frame pseudo instructions.
+  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+  // function's frame information. Also eliminates call frame pseudo
+  // instructions.
   calculateCallsInformation(Fn);
 
   // Allow the target machine to make some adjustments to the function
@@ -91,8 +92,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   // Add prolog and epilog code to the function.  This function is required
   // to align the stack frame as necessary for any stack variables or
-  // called functions.  Because of this, calculateCalleeSavedRegisters
-  // must be called before this function in order to set the HasCalls
+  // called functions.  Because of this, calculateCalleeSavedRegisters()
+  // must be called before this function in order to set the AdjustsStack
   // and MaxCallFrameSize variables.
   if (!F->hasFnAttr(Attribute::Naked))
     insertPrologEpilogCode(Fn);
@@ -126,7 +127,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 #endif
 
-/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
 /// variables for the function's frame information and eliminate call frame
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
@@ -134,7 +135,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool HasCalls = MFI->hasCalls();
+  bool AdjustsStack = MFI->adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
@@ -154,15 +155,15 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
                " instructions should have a single immediate argument!");
         unsigned Size = I->getOperand(0).getImm();
         if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
-        HasCalls = true;
+        AdjustsStack = true;
         FrameSDOps.push_back(I);
       } else if (I->isInlineAsm()) {
         // An InlineAsm might be a call; assume it is to get the stack frame
         // aligned correctly for calls.
-        HasCalls = true;
+        AdjustsStack = true;
       }
 
-  MFI->setHasCalls(HasCalls);
+  MFI->setAdjustsStack(AdjustsStack);
   MFI->setMaxCallFrameSize(MaxCallFrameSize);
 
   for (std::vector<MachineBasicBlock::iterator>::iterator
@@ -289,12 +290,13 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
     return;
 
   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   MachineBasicBlock::iterator I;
 
   if (! ShrinkWrapThisFunction) {
     // Spill using target interface.
     I = EntryBlock->begin();
-    if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
+    if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
       for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
         // Add the callee-saved register as live-in.
         // It's killed at the spill.
@@ -302,7 +304,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
         // Insert the spill to the stack frame.
         TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
-                                CSI[i].getFrameIdx(), CSI[i].getRegClass());
+                                CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI);
       }
     }
 
@@ -324,11 +326,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
       // Restore all registers immediately before the return and any
       // terminators that preceed it.
-      if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+      if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
           TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
                                    CSI[i].getFrameIdx(),
-                                   CSI[i].getRegClass());
+                                   CSI[i].getRegClass(), TRI);
           assert(I != MBB->begin() &&
                  "loadRegFromStackSlot didn't insert any code!");
           // Insert in reverse order.  loadRegFromStackSlot can insert
@@ -375,7 +377,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
       TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
                               true,
                               blockCSI[i].getFrameIdx(),
-                              blockCSI[i].getRegClass());
+                              blockCSI[i].getRegClass(), TRI);
     }
   }
 
@@ -423,7 +425,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
     for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
       TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
                                blockCSI[i].getFrameIdx(),
-                               blockCSI[i].getRegClass());
+                               blockCSI[i].getRegClass(), TRI);
       assert(I != MBB->begin() &&
              "loadRegFromStackSlot didn't insert any code!");
       // Insert in reverse order.  loadRegFromStackSlot can insert
@@ -576,7 +578,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
+    if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
       Offset += MFI->getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
@@ -585,13 +587,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
+    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
         (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
-    // If the frame pointer is eliminated, all frame offsets will be relative
-    // to SP not FP; align to MaxAlign so this works.
+
+    // If the frame pointer is eliminated, all frame offsets will be relative to
+    // SP not FP. Align to MaxAlign so this works.
     StackAlign = std::max(StackAlign, MaxAlign);
     unsigned AlignMask = StackAlign - 1;
     Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
@@ -601,7 +604,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   MFI->setStackSize(Offset - LocalAreaOffset);
 }
 
-
 /// insertPrologEpilogCode - Scan the function for modified callee saved
 /// registers, insert spill code for these callee saved registers, then add
 /// prolog and epilog code to the function.
@@ -620,7 +622,6 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   }
 }
 
-
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
 /// register references and actual offsets.
 ///
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 2caf1df..b3b5760 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -38,6 +37,7 @@ using namespace llvm;
 
 STATISTIC(NumStores, "Number of stores added");
 STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
 
 static RegisterRegAlloc
   fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -46,77 +46,80 @@ namespace {
   class RAFast : public MachineFunctionPass {
   public:
     static char ID;
-    RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+    RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1),
+               isBulkSpilling(false) {}
   private:
     const TargetMachine *TM;
     MachineFunction *MF;
+    MachineRegisterInfo *MRI;
     const TargetRegisterInfo *TRI;
     const TargetInstrInfo *TII;
 
+    // Basic block currently being allocated.
+    MachineBasicBlock *MBB;
+
     // StackSlotForVirtReg - Maps virtual regs to the frame index where these
     // values are spilled.
     IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
 
-    // Virt2PhysRegMap - This map contains entries for each virtual register
+    // Everything we know about a live virtual register.
+    struct LiveReg {
+      MachineInstr *LastUse;    // Last instr to use reg.
+      unsigned PhysReg;         // Currently held here.
+      unsigned short LastOpNum; // OpNum on LastUse.
+      bool Dirty;               // Register needs spill.
+
+      LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
+                              Dirty(false) {}
+    };
+
+    typedef DenseMap<unsigned, LiveReg> LiveRegMap;
+    typedef LiveRegMap::value_type LiveRegEntry;
+
+    // LiveVirtRegs - This map contains entries for each virtual register
     // that is currently available in a physical register.
-    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+    LiveRegMap LiveVirtRegs;
 
-    unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
-      return Virt2PhysRegMap[VirtReg];
-    }
+    // RegState - Track the state of a physical register.
+    enum RegState {
+      // A disabled register is not available for allocation, but an alias may
+      // be in use. A register can only be moved out of the disabled state if
+      // all aliases are disabled.
+      regDisabled,
 
-    // PhysRegsUsed - This array is effectively a map, containing entries for
-    // each physical register that currently has a value (ie, it is in
-    // Virt2PhysRegMap).  The value mapped to is the virtual register
-    // corresponding to the physical register (the inverse of the
-    // Virt2PhysRegMap), or 0.  The value is set to 0 if this register is pinned
-    // because it is used by a future instruction, and to -2 if it is not
-    // allocatable.  If the entry for a physical register is -1, then the
-    // physical register is "not in the map".
-    //
-    std::vector<int> PhysRegsUsed;
+      // A free register is not currently in use and can be allocated
+      // immediately without checking aliases.
+      regFree,
+
+      // A reserved register has been assigned expolicitly (e.g., setting up a
+      // call parameter), and it remains reserved until it is used.
+      regReserved
+
+      // A register state may also be a virtual register number, indication that
+      // the physical register is currently allocated to a virtual register. In
+      // that case, LiveVirtRegs contains the inverse mapping.
+    };
+
+    // PhysRegState - One of the RegState enums, or a virtreg.
+    std::vector<unsigned> PhysRegState;
 
     // UsedInInstr - BitVector of physregs that are used in the current
     // instruction, and so cannot be allocated.
     BitVector UsedInInstr;
 
-    // Virt2LastUseMap - This maps each virtual register to its last use
-    // (MachineInstr*, operand index pair).
-    IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
-    Virt2LastUseMap;
-
-    std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
-      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
-      return Virt2LastUseMap[Reg];
-    }
-
-    // VirtRegModified - This bitset contains information about which virtual
-    // registers need to be spilled back to memory when their registers are
-    // scavenged.  If a virtual register has simply been rematerialized, there
-    // is no reason to spill it to memory when we need the register back.
-    //
-    BitVector VirtRegModified;
-
-    // UsedInMultipleBlocks - Tracks whether a particular register is used in
-    // more than one block.
-    BitVector UsedInMultipleBlocks;
-
-    void markVirtRegModified(unsigned Reg, bool Val = true) {
-      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
-      Reg -= TargetRegisterInfo::FirstVirtualRegister;
-      if (Val)
-        VirtRegModified.set(Reg);
-      else
-        VirtRegModified.reset(Reg);
-    }
+    // Allocatable - vector of allocatable physical registers.
+    BitVector Allocatable;
 
-    bool isVirtRegModified(unsigned Reg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
-      assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
-             VirtRegModified.size() && "Illegal virtual register!");
-      return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
-    }
+    // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+    // completely after spilling all live registers. LiveRegMap entries should
+    // not be erased.
+    bool isBulkSpilling;
 
+    enum {
+      spillClean = 1,
+      spillDirty = 100,
+      spillImpossible = ~0u
+    };
   public:
     virtual const char *getPassName() const {
       return "Fast Register Allocator";
@@ -124,104 +127,34 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
-      AU.addRequired<LiveVariables>();
       AU.addRequiredID(PHIEliminationID);
       AU.addRequiredID(TwoAddressInstructionPassID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
   private:
-    /// runOnMachineFunction - Register allocate the whole function
     bool runOnMachineFunction(MachineFunction &Fn);
-
-    /// AllocateBasicBlock - Register allocate the specified basic block.
-    void AllocateBasicBlock(MachineBasicBlock &MBB);
-
-
-    /// areRegsEqual - This method returns true if the specified registers are
-    /// related to each other.  To do this, it checks to see if they are equal
-    /// or if the first register is in the alias set of the second register.
-    ///
-    bool areRegsEqual(unsigned R1, unsigned R2) const {
-      if (R1 == R2) return true;
-      for (const unsigned *AliasSet = TRI->getAliasSet(R2);
-           *AliasSet; ++AliasSet) {
-        if (*AliasSet == R1) return true;
-      }
-      return false;
-    }
-
-    /// getStackSpaceFor - This returns the frame index of the specified virtual
-    /// register on the stack, allocating space if necessary.
+    void AllocateBasicBlock();
     int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
-
-    /// removePhysReg - This method marks the specified physical register as no
-    /// longer being in use.
-    ///
-    void removePhysReg(unsigned PhysReg);
-
-    /// spillVirtReg - This method spills the value specified by PhysReg into
-    /// the virtual register slot specified by VirtReg.  It then updates the RA
-    /// data structures to indicate the fact that PhysReg is now available.
-    ///
-    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                      unsigned VirtReg, unsigned PhysReg);
-
-    /// spillPhysReg - This method spills the specified physical register into
-    /// the virtual register slot associated with it.  If OnlyVirtRegs is set to
-    /// true, then the request is ignored if the physical register does not
-    /// contain a virtual register.
-    ///
-    void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
-                      unsigned PhysReg, bool OnlyVirtRegs = false);
-
-    /// assignVirtToPhysReg - This method updates local state so that we know
-    /// that PhysReg is the proper container for VirtReg now.  The physical
-    /// register must not be used for anything else when this is called.
-    ///
-    void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
-
-    /// isPhysRegAvailable - Return true if the specified physical register is
-    /// free and available for use.  This also includes checking to see if
-    /// aliased registers are all free...
-    ///
-    bool isPhysRegAvailable(unsigned PhysReg) const;
-
-    /// isPhysRegSpillable - Can PhysReg be freed by spilling?
-    bool isPhysRegSpillable(unsigned PhysReg) const;
-
-    /// getFreeReg - Look to see if there is a free register available in the
-    /// specified register class.  If not, return 0.
-    ///
-    unsigned getFreeReg(const TargetRegisterClass *RC);
-
-    /// getReg - Find a physical register to hold the specified virtual
-    /// register.  If all compatible physical registers are used, this method
-    /// spills the last used virtual register to the stack, and uses that
-    /// register. If NoFree is true, that means the caller knows there isn't
-    /// a free register, do not call getFreeReg().
-    unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
-                    unsigned VirtReg, bool NoFree = false);
-
-    /// reloadVirtReg - This method transforms the specified virtual
-    /// register use to refer to a physical register.  This method may do this
-    /// in one of several ways: if the register is available in a physical
-    /// register already, it uses that physical register.  If the value is not
-    /// in a physical register, and if there are physical registers available,
-    /// it loads it into a register: PhysReg if that is an available physical
-    /// register, otherwise any physical register of the right class.
-    /// If register pressure is high, and it is possible, it tries to fold the
-    /// load of the virtual register into the instruction itself.  It avoids
-    /// doing this if register pressure is low to improve the chance that
-    /// subsequent instructions can use the reloaded value.  This method
-    /// returns the modified instruction.
-    ///
-    MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
-                                unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
-                                unsigned PhysReg);
-
-    void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
-                       unsigned PhysReg);
+    bool isLastUseOfLocalReg(MachineOperand&);
+
+    void addKillFlag(const LiveReg&);
+    void killVirtReg(LiveRegMap::iterator);
+    void killVirtReg(unsigned VirtReg);
+    void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+    void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+    void usePhysReg(MachineOperand&);
+    void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+    unsigned calcSpillCost(unsigned PhysReg) const;
+    void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
+    void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+    LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+                                       unsigned VirtReg, unsigned Hint);
+    LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+                                       unsigned VirtReg, unsigned Hint);
+    void spillAll(MachineInstr *MI);
+    bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
   };
   char RAFast::ID = 0;
 }
@@ -243,687 +176,668 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
   return FrameIdx;
 }
 
-
-/// removePhysReg - This method marks the specified physical register as no
-/// longer being in use.
-///
-void RAFast::removePhysReg(unsigned PhysReg) {
-  PhysRegsUsed[PhysReg] = -1;      // PhyReg no longer used
-}
-
-
-/// spillVirtReg - This method spills the value specified by PhysReg into the
-/// virtual register slot specified by VirtReg.  It then updates the RA data
-/// structures to indicate the fact that PhysReg is now available.
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
 ///
-void RAFast::spillVirtReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I,
-                           unsigned VirtReg, unsigned PhysReg) {
-  assert(VirtReg && "Spilling a physical register is illegal!"
-         " Must not have appropriate kill for the register or use exists beyond"
-         " the intended one.");
-  DEBUG(dbgs() << "  Spilling register " << TRI->getName(PhysReg)
-               << " containing %reg" << VirtReg);
-
-  if (!isVirtRegModified(VirtReg)) {
-    DEBUG(dbgs() << " which has not been modified, so no store necessary!");
-    std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
-    if (LastUse.first)
-      LastUse.first->getOperand(LastUse.second).setIsKill();
-  } else {
-    // Otherwise, there is a virtual register corresponding to this physical
-    // register.  We only need to spill it into its stack slot if it has been
-    // modified.
-    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
-    int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
-    // If the instruction reads the register that's spilled, (e.g. this can
-    // happen if it is a move to a physical register), then the spill
-    // instruction is not a kill.
-    bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
-    TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
-    ++NumStores;   // Update statistics
-  }
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+  // Check for non-debug uses or defs following MO.
+  // This is the most likely way to fail - fast path it.
+  MachineOperand *Next = &MO;
+  while ((Next = Next->getNextOperandForReg()))
+    if (!Next->isDebug())
+      return false;
 
-  getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
+  // If the register has ever been spilled or reloaded, we conservatively assume
+  // it is a global register used in multiple blocks.
+  if (StackSlotForVirtReg[MO.getReg()] != -1)
+    return false;
 
-  DEBUG(dbgs() << '\n');
-  removePhysReg(PhysReg);
+  // Check that the use/def chain has exactly one operand - MO.
+  return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
 }
 
-
-/// spillPhysReg - This method spills the specified physical register into the
-/// virtual register slot associated with it.  If OnlyVirtRegs is set to true,
-/// then the request is ignored if the physical register does not contain a
-/// virtual register.
-///
-void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
-                           unsigned PhysReg, bool OnlyVirtRegs) {
-  if (PhysRegsUsed[PhysReg] != -1) {            // Only spill it if it's used!
-    assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
-    if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
-      spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
-    return;
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+  if (!LR.LastUse) return;
+  MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+  if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+    if (MO.getReg() == LR.PhysReg)
+      MO.setIsKill();
+    else
+      LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
   }
+}
 
-  // If the selected register aliases any other registers, we must make
-  // sure that one of the aliases isn't alive.
-  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
-       *AliasSet; ++AliasSet) {
-    if (PhysRegsUsed[*AliasSet] == -1 ||     // Spill aliased register.
-        PhysRegsUsed[*AliasSet] == -2)       // If allocatable.
-      continue;
-
-    if (PhysRegsUsed[*AliasSet])
-      spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
-  }
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+  addKillFlag(LRI->second);
+  const LiveReg &LR = LRI->second;
+  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+  PhysRegState[LR.PhysReg] = regFree;
+  // Erase from LiveVirtRegs unless we're spilling in bulk.
+  if (!isBulkSpilling)
+    LiveVirtRegs.erase(LRI);
 }
 
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "killVirtReg needs a virtual register");
+  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  if (LRI != LiveVirtRegs.end())
+    killVirtReg(LRI);
+}
 
-/// assignVirtToPhysReg - This method updates local state so that we know
-/// that PhysReg is the proper container for VirtReg now.  The physical
-/// register must not be used for anything else when this is called.
-///
-void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
-  assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
-  // Update information to note the fact that this register was just used, and
-  // it holds VirtReg.
-  PhysRegsUsed[PhysReg] = VirtReg;
-  getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
-  UsedInInstr.set(PhysReg);
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed. If isKill is set, the register is also
+/// killed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Spilling a physical register is illegal!");
+  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+  spillVirtReg(MI, LRI);
 }
 
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+                          LiveRegMap::iterator LRI) {
+  LiveReg &LR = LRI->second;
+  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+
+  if (LR.Dirty) {
+    // If this physreg is used by the instruction, we want to kill it on the
+    // instruction, not on the spill.
+    bool SpillKill = LR.LastUse != MI;
+    LR.Dirty = false;
+    DEBUG(dbgs() << "Spilling %reg" << LRI->first
+                 << " in " << TRI->getName(LR.PhysReg));
+    const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
+    int FI = getStackSpaceFor(LRI->first, RC);
+    DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+    TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+    ++NumStores;   // Update statistics
 
-/// isPhysRegAvailable - Return true if the specified physical register is free
-/// and available for use.  This also includes checking to see if aliased
-/// registers are all free...
-///
-bool RAFast::isPhysRegAvailable(unsigned PhysReg) const {
-  if (PhysRegsUsed[PhysReg] != -1) return false;
-
-  // If the selected register aliases any other allocated registers, it is
-  // not free!
-  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
-       *AliasSet; ++AliasSet)
-    if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
-      return false;                    // Can't use this reg then.
-  return true;
+    if (SpillKill)
+      LR.LastUse = 0; // Don't kill register again
+  }
+  killVirtReg(LRI);
 }
 
-/// isPhysRegSpillable - Return true if the specified physical register can be
-/// spilled for use in the current instruction.
-///
-bool RAFast::isPhysRegSpillable(unsigned PhysReg) const {
-  // Test that PhysReg and all aliases are either free or assigned to a VirtReg
-  // that is not used in the instruction.
-  if (PhysRegsUsed[PhysReg] != -1 &&
-      (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg)))
-    return false;
-
-  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
-       *AliasSet; ++AliasSet)
-    if (PhysRegsUsed[*AliasSet] != -1 &&
-        (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet)))
-      return false;
-  return true;
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineInstr *MI) {
+  if (LiveVirtRegs.empty()) return;
+  isBulkSpilling = true;
+  // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+  // of spilling here is deterministic, if arbitrary.
+  for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+       i != e; ++i)
+    spillVirtReg(MI, i);
+  LiveVirtRegs.clear();
+  isBulkSpilling = false;
 }
 
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+  unsigned PhysReg = MO.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+         "Bad usePhysReg operand");
+
+  switch (PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regReserved:
+    PhysRegState[PhysReg] = regFree;
+    // Fall through
+  case regFree:
+    UsedInInstr.set(PhysReg);
+    MO.setIsKill();
+    return;
+  default:
+    // The physreg was allocated to a virtual register. That means to value we
+    // wanted has been clobbered.
+    llvm_unreachable("Instruction uses an allocated register");
+  }
 
-/// getFreeReg - Look to see if there is a free register available in the
-/// specified register class.  If not, return 0.
-///
-unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) {
-  // Get iterators defining the range of registers that are valid to allocate in
-  // this class, which also specifies the preferred allocation order.
-  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
-  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
-
-  for (; RI != RE; ++RI)
-    if (isPhysRegAvailable(*RI)) {       // Is reg unused?
-      assert(*RI != 0 && "Cannot use register!");
-      return *RI; // Found an unused register!
+  // Maybe a superregister is reserved?
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    switch (PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    case regReserved:
+      assert(TRI->isSuperRegister(PhysReg, Alias) &&
+             "Instruction is not using a subregister of a reserved register");
+      // Leave the superregister in the working set.
+      PhysRegState[Alias] = regFree;
+      UsedInInstr.set(Alias);
+      MO.getParent()->addRegisterKilled(Alias, TRI, true);
+      return;
+    case regFree:
+      if (TRI->isSuperRegister(PhysReg, Alias)) {
+        // Leave the superregister in the working set.
+        UsedInInstr.set(Alias);
+        MO.getParent()->addRegisterKilled(Alias, TRI, true);
+        return;
+      }
+      // Some other alias was in the working set - clear it.
+      PhysRegState[Alias] = regDisabled;
+      break;
+    default:
+      llvm_unreachable("Instruction uses an alias of an allocated register");
     }
-  return 0;
+  }
+
+  // All aliases are disabled, bring register into working set.
+  PhysRegState[PhysReg] = regFree;
+  UsedInInstr.set(PhysReg);
+  MO.setIsKill();
 }
 
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+                           RegState NewState) {
+  UsedInInstr.set(PhysReg);
+  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  default:
+    spillVirtReg(MI, VirtReg);
+    // Fall through.
+  case regFree:
+  case regReserved:
+    PhysRegState[PhysReg] = NewState;
+    return;
+  }
 
-/// getReg - Find a physical register to hold the specified virtual
-/// register.  If all compatible physical registers are used, this method spills
-/// the last used virtual register to the stack, and uses that register.
-///
-unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I,
-                         unsigned VirtReg, bool NoFree) {
-  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+  // This is a disabled register, disable all aliases.
+  PhysRegState[PhysReg] = NewState;
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    UsedInInstr.set(Alias);
+    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    default:
+      spillVirtReg(MI, VirtReg);
+      // Fall through.
+    case regFree:
+    case regReserved:
+      PhysRegState[Alias] = regDisabled;
+      if (TRI->isSuperRegister(PhysReg, Alias))
+        return;
+      break;
+    }
+  }
+}
 
-  // First check to see if we have a free register of the requested type...
-  unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
 
-  if (PhysReg != 0) {
-    // Assign the register.
-    assignVirtToPhysReg(VirtReg, PhysReg);
-    return PhysReg;
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+  if (UsedInInstr.test(PhysReg))
+    return spillImpossible;
+  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regFree:
+    return 0;
+  case regReserved:
+    return spillImpossible;
+  default:
+    return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
   }
 
-  // If we didn't find an unused register, scavenge one now! Don't be fancy,
-  // just grab the first possible register.
-  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
-  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
-
-  for (; RI != RE; ++RI)
-    if (isPhysRegSpillable(*RI)) {
-      PhysReg = *RI;
+  // This is a disabled register, add up const of aliases.
+  unsigned Cost = 0;
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    if (UsedInInstr.test(Alias))
+      return spillImpossible;
+    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    case regFree:
+      ++Cost;
+      break;
+    case regReserved:
+      return spillImpossible;
+    default:
+      Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
       break;
     }
-
-  assert(PhysReg && "Physical register not assigned!?!?");
-  spillPhysReg(MBB, I, PhysReg);
-  assignVirtToPhysReg(VirtReg, PhysReg);
-  return PhysReg;
+  }
+  return Cost;
 }
 
 
-/// reloadVirtReg - This method transforms the specified virtual
-/// register use to refer to a physical register.  This method may do this in
-/// one of several ways: if the register is available in a physical register
-/// already, it uses that physical register.  If the value is not in a physical
-/// register, and if there are physical registers available, it loads it into a
-/// register: PhysReg if that is an available physical register, otherwise any
-/// register.  If register pressure is high, and it is possible, it tries to
-/// fold the load of the virtual register into the instruction itself.  It
-/// avoids doing this if register pressure is low to improve the chance that
-/// subsequent instructions can use the reloaded value.  This method returns
-/// the modified instruction.
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
 ///
-MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
-                                     unsigned OpNum,
-                                     SmallSet<unsigned, 4> &ReloadedRegs,
-                                     unsigned PhysReg) {
-  unsigned VirtReg = MI->getOperand(OpNum).getReg();
-
-  // If the virtual register is already available, just update the instruction
-  // and return.
-  if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
-    MI->getOperand(OpNum).setReg(PR);  // Assign the input register
-    if (!MI->isDebugValue()) {
-      // Do not do these for DBG_VALUE as they can affect codegen.
-      UsedInInstr.set(PR);
-      getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
+  DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
+               << TRI->getName(PhysReg) << "\n");
+  PhysRegState[PhysReg] = LRE.first;
+  assert(!LRE.second.PhysReg && "Already assigned a physreg");
+  LRE.second.PhysReg = PhysReg;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
+  const unsigned VirtReg = LRE.first;
+
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Can only allocate virtual registers");
+
+  const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+  // Ignore invalid hints.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || !Allocatable.test(Hint)))
+    Hint = 0;
+
+  // Take hint when possible.
+  if (Hint) {
+    switch(calcSpillCost(Hint)) {
+    default:
+      definePhysReg(MI, Hint, regFree);
+      // Fall through.
+    case 0:
+      return assignVirtToPhysReg(LRE, Hint);
+    case spillImpossible:
+      break;
     }
-    return MI;
   }
 
-  // Otherwise, we need to fold it into the current instruction, or reload it.
-  // If we have registers available to hold the value, use them.
-  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
-  // If we already have a PhysReg (this happens when the instruction is a
-  // reg-to-reg copy with a PhysReg destination) use that.
-  if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
-      !isPhysRegAvailable(PhysReg))
-    PhysReg = getFreeReg(RC);
-  int FrameIndex = getStackSpaceFor(VirtReg, RC);
-
-  if (PhysReg) {   // Register is available, allocate it!
-    assignVirtToPhysReg(VirtReg, PhysReg);
-  } else {         // No registers available.
-    // Force some poor hapless value out of the register file to
-    // make room for the new register, and reload it.
-    PhysReg = getReg(MBB, MI, VirtReg, true);
+  TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
+
+  // First try to find a completely free register.
+  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+    unsigned PhysReg = *I;
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
+      return assignVirtToPhysReg(LRE, PhysReg);
   }
 
-  markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
+  DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
+               << "\n");
+
+  unsigned BestReg = 0, BestCost = spillImpossible;
+  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+    unsigned Cost = calcSpillCost(*I);
+    // Cost is 0 when all aliases are already disabled.
+    if (Cost == 0)
+      return assignVirtToPhysReg(LRE, *I);
+    if (Cost < BestCost)
+      BestReg = *I, BestCost = Cost;
+  }
 
-  DEBUG(dbgs() << "  Reloading %reg" << VirtReg << " into "
-               << TRI->getName(PhysReg) << "\n");
+  if (BestReg) {
+    definePhysReg(MI, BestReg, regFree);
+    return assignVirtToPhysReg(LRE, BestReg);
+  }
 
-  // Add move instruction(s)
-  TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
-  ++NumLoads;    // Update statistics
-
-  MF->getRegInfo().setPhysRegUsed(PhysReg);
-  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
-  getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
-
-  if (!ReloadedRegs.insert(PhysReg)) {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "Ran out of registers during register allocation!";
-    if (MI->isInlineAsm()) {
-      Msg << "\nPlease check your inline asm statement for invalid "
-           << "constraints:\n";
-      MI->print(Msg, TM);
-    }
-    report_fatal_error(Msg.str());
+  // Nothing we can do.
+  std::string msg;
+  raw_string_ostream Msg(msg);
+  Msg << "Ran out of registers during register allocation!";
+  if (MI->isInlineAsm()) {
+    Msg << "\nPlease check your inline asm statement for "
+        << "invalid constraints:\n";
+    MI->print(Msg, TM);
   }
-  for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
-       *SubRegs; ++SubRegs) {
-    if (ReloadedRegs.insert(*SubRegs)) continue;
-
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "Ran out of registers during register allocation!";
-    if (MI->isInlineAsm()) {
-      Msg << "\nPlease check your inline asm statement for invalid "
-           << "constraints:\n";
-      MI->print(Msg, TM);
+  report_fatal_error(Msg.str());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+                      unsigned VirtReg, unsigned Hint) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Not a virtual register");
+  LiveRegMap::iterator LRI;
+  bool New;
+  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+  LiveReg &LR = LRI->second;
+  bool PartialRedef = MI->getOperand(OpNum).getSubReg();
+  if (New) {
+    // If there is no hint, peek at the only use of this register.
+    if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+        MRI->hasOneNonDBGUse(VirtReg)) {
+      unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+      // It's a copy, use the destination register as a hint.
+      if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg),
+                           SrcReg, DstReg, SrcSubReg, DstSubReg))
+        Hint = DstReg;
+    }
+    allocVirtReg(MI, *LRI, Hint);
+    // If this is only a partial redefinition, we must reload the other parts.
+    if (PartialRedef && MI->readsVirtualRegister(VirtReg)) {
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+      int FI = getStackSpaceFor(VirtReg, RC);
+      DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n");
+      TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI);
+      ++NumLoads;
     }
-    report_fatal_error(Msg.str());
+  } else if (LR.LastUse && !PartialRedef) {
+    // Redefining a live register - kill at the last use, unless it is this
+    // instruction defining VirtReg multiple times.
+    if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
+      addKillFlag(LR);
   }
-
-  return MI;
+  assert(LR.PhysReg && "Register not assigned");
+  LR.LastUse = MI;
+  LR.LastOpNum = OpNum;
+  LR.Dirty = true;
+  UsedInInstr.set(LR.PhysReg);
+  return LRI;
 }
 
-/// isReadModWriteImplicitKill - True if this is an implicit kill for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
-        MO.isDef() && !MO.isDead())
-      return true;
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+                      unsigned VirtReg, unsigned Hint) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Not a virtual register");
+  LiveRegMap::iterator LRI;
+  bool New;
+  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+  LiveReg &LR = LRI->second;
+  MachineOperand &MO = MI->getOperand(OpNum);
+  if (New) {
+    allocVirtReg(MI, *LRI, Hint);
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
+                 << TRI->getName(LR.PhysReg) << "\n");
+    TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+    ++NumLoads;
+  } else if (LR.Dirty) {
+    if (isLastUseOfLocalReg(MO)) {
+      DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+      MO.setIsKill();
+    } else if (MO.isKill()) {
+      DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+      MO.setIsKill(false);
+    }
+  } else if (MO.isKill()) {
+    // We must remove kill flags from uses of reloaded registers because the
+    // register would be killed immediately, and there might be a second use:
+    //   %foo = OR %x<kill>, %x
+    // This would cause a second reload of %x into a different register.
+    DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+    MO.setIsKill(false);
   }
-  return false;
+  assert(LR.PhysReg && "Register not assigned");
+  LR.LastUse = MI;
+  LR.LastOpNum = OpNum;
+  UsedInInstr.set(LR.PhysReg);
+  return LRI;
 }
 
-/// isReadModWriteImplicitDef - True if this is an implicit def for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
-        !MO.isDef() && MO.isKill())
-      return true;
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+  MachineOperand &MO = MI->getOperand(OpNum);
+  if (!MO.getSubReg()) {
+    MO.setReg(PhysReg);
+    return MO.isKill() || MO.isDead();
   }
-  return false;
-}
 
-void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) {
-  // loop over each instruction
-  MachineBasicBlock::iterator MII = MBB.begin();
-
-  DEBUG({
-      const BasicBlock *LBB = MBB.getBasicBlock();
-      if (LBB)
-        dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
-    });
-
-  // Add live-in registers as active.
-  for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
-         E = MBB.livein_end(); I != E; ++I) {
-    unsigned Reg = *I;
-    MF->getRegInfo().setPhysRegUsed(Reg);
-    PhysRegsUsed[Reg] = 0;            // It is free and reserved now
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-         *SubRegs; ++SubRegs) {
-      if (PhysRegsUsed[*SubRegs] == -2) continue;
-      PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-      MF->getRegInfo().setPhysRegUsed(*SubRegs);
-    }
+  // Handle subregister index.
+  MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+  MO.setSubReg(0);
+
+  // A kill flag implies killing the full register. Add corresponding super
+  // register kill.
+  if (MO.isKill()) {
+    MI->addRegisterKilled(PhysReg, TRI, true);
+    return true;
   }
+  return MO.isDead();
+}
+
+void RAFast::AllocateBasicBlock() {
+  DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+  PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+  assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+
+  MachineBasicBlock::iterator MII = MBB->begin();
+
+  // Add live-in registers as live.
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    definePhysReg(MII, *I, regReserved);
+
+  SmallVector<unsigned, 8> PhysECs, VirtDead;
+  SmallVector<MachineInstr*, 32> Coalesced;
 
   // Otherwise, sequentially allocate each instruction in the MBB.
-  while (MII != MBB.end()) {
+  while (MII != MBB->end()) {
     MachineInstr *MI = MII++;
     const TargetInstrDesc &TID = MI->getDesc();
     DEBUG({
-        dbgs() << "\nStarting RegAlloc of: " << *MI;
-        dbgs() << "  Regs have values: ";
-        for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
-          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
-            dbgs() << "[" << TRI->getName(i)
-                   << ",%reg" << PhysRegsUsed[i] << "] ";
+        dbgs() << "\n>> " << *MI << "Regs:";
+        for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+          if (PhysRegState[Reg] == regDisabled) continue;
+          dbgs() << " " << TRI->getName(Reg);
+          switch(PhysRegState[Reg]) {
+          case regFree:
+            break;
+          case regReserved:
+            dbgs() << "*";
+            break;
+          default:
+            dbgs() << "=%reg" << PhysRegState[Reg];
+            if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+              dbgs() << "*";
+            assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
+                   "Bad inverse map");
+            break;
+          }
+        }
         dbgs() << '\n';
+        // Check that LiveVirtRegs is the inverse.
+        for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+             e = LiveVirtRegs.end(); i != e; ++i) {
+           assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+                  "Bad map key");
+           assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+                  "Bad map value");
+           assert(PhysRegState[i->second.PhysReg] == i->first &&
+                  "Bad inverse map");
+        }
       });
 
-    // Track registers used by instruction.
-    UsedInInstr.reset();
-
-    // Determine whether this is a copy instruction.  The cases where the
-    // source or destination are phys regs are handled specially.
-    unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
-    unsigned SrcCopyPhysReg = 0U;
-    bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
-                                   SrcCopySubReg, DstCopySubReg);
-    if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
-      SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
-
-    // Loop over the implicit uses, making sure they don't get reallocated.
-    if (TID.ImplicitUses) {
-      for (const unsigned *ImplicitUses = TID.ImplicitUses;
-           *ImplicitUses; ++ImplicitUses)
-        UsedInInstr.set(*ImplicitUses);
-    }
-
-    SmallVector<unsigned, 8> Kills;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isKill()) continue;
-
-      if (!MO.isImplicit())
-        Kills.push_back(MO.getReg());
-      else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
-        // These are extra physical register kills when a sub-register
-        // is defined (def of a sub-register is a read/mod/write of the
-        // larger registers). Ignore.
-        Kills.push_back(MO.getReg());
-    }
-
-    // If any physical regs are earlyclobber, spill any value they might
-    // have in them, then mark them unallocatable.
-    // If any virtual regs are earlyclobber, allocate them now (before
-    // freeing inputs that are killed).
-    if (MI->isInlineAsm()) {
+    // Debug values are not allowed to change codegen in any way.
+    if (MI->isDebugValue()) {
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
-        if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
-            !MO.getReg())
-          continue;
-
-        if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-          unsigned DestVirtReg = MO.getReg();
-          unsigned DestPhysReg;
-
-          // If DestVirtReg already has a value, use it.
-          if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
-            DestPhysReg = getReg(MBB, MI, DestVirtReg);
-          MF->getRegInfo().setPhysRegUsed(DestPhysReg);
-          markVirtRegModified(DestVirtReg);
-          getVirtRegLastUse(DestVirtReg) =
-                 std::make_pair((MachineInstr*)0, 0);
-          DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
-                       << " to %reg" << DestVirtReg << "\n");
-          MO.setReg(DestPhysReg);  // Assign the earlyclobber register
-        } else {
-          unsigned Reg = MO.getReg();
-          if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
-          // These are extra physical register defs when a sub-register
-          // is defined (def of a sub-register is a read/mod/write of the
-          // larger registers). Ignore.
-          if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
-          MF->getRegInfo().setPhysRegUsed(Reg);
-          spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
-          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
-
-          for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-               *SubRegs; ++SubRegs) {
-            if (PhysRegsUsed[*SubRegs] == -2) continue;
-            MF->getRegInfo().setPhysRegUsed(*SubRegs);
-            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-          }
-        }
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+        LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+        if (LRI != LiveVirtRegs.end())
+          setPhysReg(MI, i, LRI->second.PhysReg);
+        else
+          MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
       }
+      // Next instruction.
+      continue;
     }
 
-    // If a DBG_VALUE says something is located in a spilled register,
-    // change the DBG_VALUE to be undef, which prevents the register
-    // from being reloaded here.  Doing that would change the generated
-    // code, unless another use immediately follows this instruction.
-    if (MI->isDebugValue() &&
-        MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
-      unsigned VirtReg = MI->getOperand(0).getReg();
-      if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-          !getVirt2PhysRegMapSlot(VirtReg))
-        MI->getOperand(0).setReg(0U);
-    }
+    // If this is a copy, we may be able to coalesce.
+    unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub;
+    if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
+      CopySrc = CopyDst = 0;
 
-    // Get the used operands into registers.  This has the potential to spill
-    // incoming values if we are out of registers.  Note that we completely
-    // ignore physical register uses here.  We assume that if an explicit
-    // physical register is referenced by the instruction, that it is guaranteed
-    // to be live-in, or the input is badly hosed.
-    //
-    SmallSet<unsigned, 4> ReloadedRegs;
-    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      // here we are looking for only used operands (never def&use)
-      if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
-          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-        MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
-                           isCopy ? DstCopyReg : 0);
-    }
+    // Track registers used by instruction.
+    UsedInInstr.reset();
+    PhysECs.clear();
 
-    // If this instruction is the last user of this register, kill the
-    // value, freeing the register being used, so it doesn't need to be
-    // spilled to memory.
-    //
-    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
-      unsigned VirtReg = Kills[i];
-      unsigned PhysReg = VirtReg;
-      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
-        // If the virtual register was never materialized into a register, it
-        // might not be in the map, but it won't hurt to zero it out anyway.
-        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
-        PhysReg = PhysRegSlot;
-        PhysRegSlot = 0;
-      } else if (PhysRegsUsed[PhysReg] == -2) {
-        // Unallocatable register dead, ignore.
+    // First scan.
+    // Mark physreg uses and early clobbers as used.
+    // Find the end of the virtreg operands
+    unsigned VirtOpEnd = 0;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg) continue;
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        VirtOpEnd = i+1;
         continue;
-      } else {
-        assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
-               "Silently clearing a virtual register?");
       }
-
-      if (!PhysReg) continue;
-
-      DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
-                   << "[%reg" << VirtReg <<"], removing it from live set\n");
-      removePhysReg(PhysReg);
-      for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
-           *SubRegs; ++SubRegs) {
-        if (PhysRegsUsed[*SubRegs] != -2) {
-          DEBUG(dbgs()  << "  Last use of "
-                        << TRI->getName(*SubRegs) << "[%reg" << VirtReg
-                        <<"], removing it from live set\n");
-          removePhysReg(*SubRegs);
-        }
+      if (!Allocatable.test(Reg)) continue;
+      if (MO.isUse()) {
+        usePhysReg(MO);
+      } else if (MO.isEarlyClobber()) {
+        definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+        PhysECs.push_back(Reg);
       }
     }
 
-    // Track registers defined by instruction.
-    UsedInInstr.reset();
-
-    // Loop over all of the operands of the instruction, spilling registers that
-    // are defined, and marking explicit destinations in the PhysRegsUsed map.
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    // Second scan.
+    // Allocate virtreg uses and early clobbers.
+    // Collect VirtKills
+    for (unsigned i = 0; i != VirtOpEnd; ++i) {
       MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
-          MO.isEarlyClobber() ||
-          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-        continue;
-
+      if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
-      if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
-      // These are extra physical register defs when a sub-register
-      // is defined (def of a sub-register is a read/mod/write of the
-      // larger registers). Ignore.
-      if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
-      MF->getRegInfo().setPhysRegUsed(Reg);
-      spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
-      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
-
-      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-           *SubRegs; ++SubRegs) {
-        if (PhysRegsUsed[*SubRegs] == -2) continue;
-
-        MF->getRegInfo().setPhysRegUsed(*SubRegs);
-        PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+      if (MO.isUse()) {
+        LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+        unsigned PhysReg = LRI->second.PhysReg;
+        CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+        if (setPhysReg(MI, i, PhysReg))
+          killVirtReg(LRI);
+      } else if (MO.isEarlyClobber()) {
+        // Note: defineVirtReg may invalidate MO.
+        LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+        unsigned PhysReg = LRI->second.PhysReg;
+        setPhysReg(MI, i, PhysReg);
+        PhysECs.push_back(PhysReg);
       }
     }
 
-    // Loop over the implicit defs, spilling them as well.
-    if (TID.ImplicitDefs) {
-      for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
-           *ImplicitDefs; ++ImplicitDefs) {
-        unsigned Reg = *ImplicitDefs;
-        if (PhysRegsUsed[Reg] != -2) {
-          spillPhysReg(MBB, MI, Reg, true);
-          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
-        }
-        MF->getRegInfo().setPhysRegUsed(Reg);
-        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-             *SubRegs; ++SubRegs) {
-          if (PhysRegsUsed[*SubRegs] == -2) continue;
+    MRI->addPhysRegsUsed(UsedInInstr);
 
-          PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-          MF->getRegInfo().setPhysRegUsed(*SubRegs);
-        }
-      }
+    // Track registers defined by instruction - early clobbers at this point.
+    UsedInInstr.reset();
+    for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) {
+      unsigned PhysReg = PhysECs[i];
+      UsedInInstr.set(PhysReg);
+      for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+            unsigned Alias = *AS; ++AS)
+        UsedInInstr.set(Alias);
     }
 
-    SmallVector<unsigned, 8> DeadDefs;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isDead())
-        DeadDefs.push_back(MO.getReg());
+    unsigned DefOpEnd = MI->getNumOperands();
+    if (TID.isCall()) {
+      // Spill all virtregs before a call. This serves two purposes: 1. If an
+      // exception is thrown, the landing pad is going to expect to find registers
+      // in their spill slots, and 2. we don't have to wade through all the
+      // <imp-def> operands on the call instruction.
+      DefOpEnd = VirtOpEnd;
+      DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
+      spillAll(MI);
     }
 
-    // Okay, we have allocated all of the source operands and spilled any values
-    // that would be destroyed by defs of this instruction.  Loop over the
-    // explicit defs and assign them to a register, spilling incoming values if
-    // we need to scavenge a register.
-    //
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    // Third scan.
+    // Allocate defs and collect dead defs.
+    for (unsigned i = 0; i != DefOpEnd; ++i) {
       MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
-          MO.isEarlyClobber() ||
-          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-        continue;
-
-      unsigned DestVirtReg = MO.getReg();
-      unsigned DestPhysReg;
-
-      // If DestVirtReg already has a value, use it.
-      if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
-        // If this is a copy try to reuse the input as the output;
-        // that will make the copy go away.
-        // If this is a copy, the source reg is a phys reg, and
-        // that reg is available, use that phys reg for DestPhysReg.
-        // If this is a copy, the source reg is a virtual reg, and
-        // the phys reg that was assigned to that virtual reg is now
-        // available, use that phys reg for DestPhysReg.  (If it's now
-        // available that means this was the last use of the source.)
-        if (isCopy &&
-            TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
-            isPhysRegAvailable(SrcCopyReg)) {
-          DestPhysReg = SrcCopyReg;
-          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
-        } else if (isCopy &&
-            TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
-            SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
-            MF->getRegInfo().getRegClass(DestVirtReg)->
-                             contains(SrcCopyPhysReg)) {
-          DestPhysReg = SrcCopyPhysReg;
-          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
-        } else
-          DestPhysReg = getReg(MBB, MI, DestVirtReg);
-      }
-      MF->getRegInfo().setPhysRegUsed(DestPhysReg);
-      markVirtRegModified(DestVirtReg);
-      getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
-      DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
-                   << " to %reg" << DestVirtReg << "\n");
-      MO.setReg(DestPhysReg);  // Assign the output register
-      UsedInInstr.set(DestPhysReg);
-    }
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue;
+      unsigned Reg = MO.getReg();
 
-    // If this instruction defines any registers that are immediately dead,
-    // kill them now.
-    //
-    for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
-      unsigned VirtReg = DeadDefs[i];
-      unsigned PhysReg = VirtReg;
-      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
-        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
-        PhysReg = PhysRegSlot;
-        assert(PhysReg != 0);
-        PhysRegSlot = 0;
-      } else if (PhysRegsUsed[PhysReg] == -2) {
-        // Unallocatable register dead, ignore.
-        continue;
-      } else if (!PhysReg)
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        if (!Allocatable.test(Reg)) continue;
+        definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+                               regFree : regReserved);
         continue;
-
-      DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
-                    << " [%reg" << VirtReg
-                    << "] is never used, removing it from live set\n");
-      removePhysReg(PhysReg);
-      for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
-           *AliasSet; ++AliasSet) {
-        if (PhysRegsUsed[*AliasSet] != -2) {
-          DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
-                        << " [%reg" << *AliasSet
-                        << "] is never used, removing it from live set\n");
-          removePhysReg(*AliasSet);
-        }
       }
+      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+      unsigned PhysReg = LRI->second.PhysReg;
+      if (setPhysReg(MI, i, PhysReg)) {
+        VirtDead.push_back(Reg);
+        CopyDst = 0; // cancel coalescing;
+      } else
+        CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
     }
 
-    // Finally, if this is a noop copy instruction, zap it.  (Except that if
-    // the copy is dead, it must be kept to avoid messing up liveness info for
-    // the register scavenger.  See pr4100.)
-    if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
-                         SrcCopySubReg, DstCopySubReg) &&
-        SrcCopyReg == DstCopyReg && DeadDefs.empty())
-      MBB.erase(MI);
+    // Kill dead defs after the scan to ensure that multiple defs of the same
+    // register are allocated identically. We didn't need to do this for uses
+    // because we are crerating our own kill flags, and they are always at the
+    // last use.
+    for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+      killVirtReg(VirtDead[i]);
+    VirtDead.clear();
+
+    MRI->addPhysRegsUsed(UsedInInstr);
+
+    if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+      DEBUG(dbgs() << "-- coalescing: " << *MI);
+      Coalesced.push_back(MI);
+    } else {
+      DEBUG(dbgs() << "<< " << *MI);
+    }
   }
 
-  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
-
   // Spill all physical registers holding virtual registers now.
-  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
-    if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
-      if (unsigned VirtReg = PhysRegsUsed[i])
-        spillVirtReg(MBB, MI, VirtReg, i);
-      else
-        removePhysReg(i);
-    }
+  DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+  spillAll(MBB->getFirstTerminator());
+
+  // Erase all the coalesced copies. We are delaying it until now because
+  // LiveVirtRegs might refer to the instrs.
+  for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+    MBB->erase(Coalesced[i]);
+  NumCopies += Coalesced.size();
+
+  DEBUG(MBB->dump());
 }
 
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
-  DEBUG(dbgs() << "Machine Function\n");
+  DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)Fn.getFunction())->getName() << '\n');
   MF = &Fn;
+  MRI = &MF->getRegInfo();
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
 
-  PhysRegsUsed.assign(TRI->getNumRegs(), -1);
   UsedInInstr.resize(TRI->getNumRegs());
-
-  // At various places we want to efficiently check to see whether a register
-  // is allocatable.  To handle this, we mark all unallocatable registers as
-  // being pinned down, permanently.
-  {
-    BitVector Allocable = TRI->getAllocatableSet(Fn);
-    for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
-      if (!Allocable[i])
-        PhysRegsUsed[i] = -2;  // Mark the reg unallocable.
-  }
+  Allocatable = TRI->getAllocatableSet(*MF);
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
-  unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+  unsigned LastVirtReg = MRI->getLastVirtReg();
   StackSlotForVirtReg.grow(LastVirtReg);
-  Virt2PhysRegMap.grow(LastVirtReg);
-  Virt2LastUseMap.grow(LastVirtReg);
-  VirtRegModified.resize(LastVirtReg+1 -
-                         TargetRegisterInfo::FirstVirtualRegister);
-  UsedInMultipleBlocks.resize(LastVirtReg+1 -
-                              TargetRegisterInfo::FirstVirtualRegister);
 
   // Loop over all of the basic blocks, eliminating virtual register references
-  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
-       MBB != MBBe; ++MBB)
-    AllocateBasicBlock(*MBB);
+  for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+       MBBi != MBBe; ++MBBi) {
+    MBB = &*MBBi;
+    AllocateBasicBlock();
+  }
+
+  // Make sure the set of used physregs is closed under subreg operations.
+  MRI->closePhysRegsUsed(*TRI);
 
   StackSlotForVirtReg.clear();
-  PhysRegsUsed.clear();
-  VirtRegModified.clear();
-  UsedInMultipleBlocks.clear();
-  Virt2PhysRegMap.clear();
-  Virt2LastUseMap.clear();
   return true;
 }
 
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 6c8fc0c..bc331f0 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -809,7 +809,7 @@ float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
     MachineInstr *MI = &*I;
     if (cur->liveAt(li_->getInstructionIndex(MI))) {
       unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
-      Conflicts += powf(10.0f, (float)loopDepth);
+      Conflicts += std::pow(10.0f, (float)loopDepth);
     }
   }
   return Conflicts;
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 94456d1..321ae12 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -37,6 +37,7 @@ using namespace llvm;
 
 STATISTIC(NumStores, "Number of stores added");
 STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
 
 static RegisterRegAlloc
   localRegAlloc("local", "local register allocator",
@@ -50,6 +51,7 @@ namespace {
   private:
     const TargetMachine *TM;
     MachineFunction *MF;
+    MachineRegisterInfo *MRI;
     const TargetRegisterInfo *TRI;
     const TargetInstrInfo *TII;
 
@@ -297,8 +299,18 @@ void RALocal::storeVirtReg(MachineBasicBlock &MBB,
   const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
   int FrameIndex = getStackSpaceFor(VirtReg, RC);
   DEBUG(dbgs() << " to stack slot #" << FrameIndex);
-  TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+  TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI);
   ++NumStores;   // Update statistics
+
+  // Mark the spill instruction as last use if we're not killing the register.
+  if (!isKill) {
+    MachineInstr *Spill = llvm::prior(I);
+    int OpNum = Spill->findRegisterUseOperandIdx(PhysReg);
+    if (OpNum < 0)
+      getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0);
+    else
+      getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum);
+  }
 }
 
 /// spillVirtReg - This method spills the value specified by PhysReg into the
@@ -506,10 +518,15 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
                                      SmallSet<unsigned, 4> &ReloadedRegs,
                                      unsigned PhysReg) {
   unsigned VirtReg = MI->getOperand(OpNum).getReg();
+  unsigned SubIdx = MI->getOperand(OpNum).getSubReg();
 
   // If the virtual register is already available, just update the instruction
   // and return.
   if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+    if (SubIdx) {
+      PR = TRI->getSubReg(PR, SubIdx);
+      MI->getOperand(OpNum).setSubReg(0);
+    }
     MI->getOperand(OpNum).setReg(PR);  // Assign the input register
     if (!MI->isDebugValue()) {
       // Do not do these for DBG_VALUE as they can affect codegen.
@@ -543,11 +560,16 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
                << TRI->getName(PhysReg) << "\n");
 
   // Add move instruction(s)
-  TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+  TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI);
   ++NumLoads;    // Update statistics
 
   MF->getRegInfo().setPhysRegUsed(PhysReg);
-  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  // Assign the input register.
+  if (SubIdx) {
+    MI->getOperand(OpNum).setSubReg(0);
+    MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx));
+  } else
+    MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
   getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
 
   if (!ReloadedRegs.insert(PhysReg)) {
@@ -626,7 +648,6 @@ static bool precedes(MachineBasicBlock::iterator A,
 /// ComputeLocalLiveness - Computes liveness of registers within a basic
 /// block, setting the killed/dead flags as appropriate.
 void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
-  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   // Keep track of the most recently seen previous use or def of each reg, 
   // so that we can update them with dead/kill markers.
   DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
@@ -672,18 +693,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       //   - A def followed by a def is dead
       //   - A use followed by a def is a kill
       if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
-      
+
+      unsigned SubIdx = MO.getSubReg();
       DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
         last = LastUseDef.find(MO.getReg());
       if (last != LastUseDef.end()) {
         // Check if this is a two address instruction.  If so, then
         // the def does not kill the use.
-        if (last->second.first == I &&
-            I->isRegTiedToUseOperand(i))
+        if (last->second.first == I && I->isRegTiedToUseOperand(i))
           continue;
         
         MachineOperand &lastUD =
                     last->second.first->getOperand(last->second.second);
+        if (SubIdx && lastUD.getSubReg() != SubIdx)
+          // Partial re-def, the last def is not dead.
+          // %reg1024:5<def> =
+          // %reg1024:6<def> =
+          // or 
+          // %reg1024:5<def> = op %reg1024, 5
+          continue;
+
         if (lastUD.isDef())
           lastUD.setIsDead(true);
         else
@@ -732,8 +761,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       // it wouldn't have been otherwise.  Nullify the DBG_VALUEs when that
       // happens.
       bool UsedByDebugValueOnly = false;
-      for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
-             UE = MRI.reg_end(); UI != UE; ++UI) {
+      for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
+             UE = MRI->reg_end(); UI != UE; ++UI) {
         // Two cases:
         // - used in another block
         // - used in the same block before it is defined (loop)
@@ -755,8 +784,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       }
 
       if (UsedByDebugValueOnly)
-        for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
-             UE = MRI.reg_end(); UI != UE; ++UI)
+        for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
+             UE = MRI->reg_end(); UI != UE; ++UI)
           if (UI->isDebugValue() &&
               (UI->getParent() != &MBB ||
                (MO.isDef() && precedes(&*UI, MI))))
@@ -828,7 +857,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
     unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
     unsigned SrcCopyPhysReg = 0U;
     bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, 
-                                   SrcCopySubReg, DstCopySubReg);
+                                   SrcCopySubReg, DstCopySubReg) &&
+      SrcCopySubReg == DstCopySubReg;
     if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
       SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
 
@@ -878,6 +908,10 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
                  std::make_pair((MachineInstr*)0, 0);
           DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
                        << " to %reg" << DestVirtReg << "\n");
+          if (unsigned DestSubIdx = MO.getSubReg()) {
+            MO.setSubReg(0);
+            DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
+          }
           MO.setReg(DestPhysReg);  // Assign the earlyclobber register
         } else {
           unsigned Reg = MO.getReg();
@@ -1073,6 +1107,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
       DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
                    << " to %reg" << DestVirtReg << "\n");
+
+      if (unsigned DestSubIdx = MO.getSubReg()) {
+        MO.setSubReg(0);
+        DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
+      }
       MO.setReg(DestPhysReg);  // Assign the output register
     }
 
@@ -1127,8 +1166,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
     // the register scavenger.  See pr4100.)
     if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
                          SrcCopySubReg, DstCopySubReg) &&
-        SrcCopyReg == DstCopyReg && DeadDefs.empty())
+        SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg &&
+        DeadDefs.empty()) {
+      ++NumCopies;
       MBB.erase(MI);
+    }
   }
 
   MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
@@ -1165,6 +1207,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
 bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
   DEBUG(dbgs() << "Machine Function\n");
   MF = &Fn;
+  MRI = &Fn.getRegInfo();
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 81cfd8f..4fafd28 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -489,7 +489,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
       // did, but none of their definitions would prevent us from coalescing.
       // We're good to go with the coalesce.
 
-      float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0;
+      float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
 
       coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
       coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 179984f..690e59f 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -343,12 +343,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
     // Spill the scavenged register before I.
     assert(ScavengingFrameIndex >= 0 &&
            "Cannot scavenge register without an emergency spill slot!");
-    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
     MachineBasicBlock::iterator II = prior(I);
     TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
 
     // Restore the scavenged register before its use (or first terminator).
-    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC);
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
     II = prior(UseMI);
     TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
   }
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 587f001..da20c12 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -27,7 +27,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
   : TM(mf.getTarget()),
     TII(TM.getInstrInfo()),
     TRI(TM.getRegisterInfo()),
-    TLI(TM.getTargetLowering()),
     MF(mf), MRI(mf.getRegInfo()),
     EntrySU(), ExitSU() {
 }
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 8e03420..ee08e1d 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -51,7 +51,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
         }
       }
       bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
-                                       SU->CopyDstRC, SU->CopySrcRC);
+                                       SU->CopyDstRC, SU->CopySrcRC,
+                                       DebugLoc());
       (void)Success;
       assert(Success && "copyRegToReg failed!");
     } else {
@@ -62,7 +63,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
       isNew = isNew; // Silence compiler warning.
       assert(isNew && "Node emitted out of order - early");
       bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
-                                       SU->CopyDstRC, SU->CopySrcRC);
+                                       SU->CopyDstRC, SU->CopySrcRC,
+                                       DebugLoc());
       (void)Success;
       assert(Success && "copyRegToReg failed!");
     }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ca235c3..09202f8 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -210,7 +210,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
 
       if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
-        SU->setDbgInstr(DanglingDebugValue[Reg].first);
+        SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
         DbgValueVec[DanglingDebugValue[Reg].second] = 0;
         DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
       }
@@ -599,8 +599,8 @@ MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
     }
 
     BB->insert(InsertPos, SU->getInstr());
-    if (SU->getDbgInstr())
-      BB->insert(InsertPos, SU->getDbgInstr());
+    for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i)
+      BB->insert(InsertPos, SU->DbgInstrList[i]);
   }
 
   // Update the Begin iterator, as the first instruction in the block
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index d70608f..ad82db2 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -32,7 +32,7 @@ namespace llvm {
   /// For example, loop induction variable increments should be
   /// scheduled as soon as possible after the variable's last use.
   ///
-  class VISIBILITY_HIDDEN LoopDependencies {
+  class LLVM_LIBRARY_VISIBILITY LoopDependencies {
     const MachineLoopInfo &MLI;
     const MachineDominatorTree &MDT;
 
@@ -94,7 +94,7 @@ namespace llvm {
 
   /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
   /// MachineInstrs.
-  class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG {
+  class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
     const MachineLoopInfo &MLI;
     const MachineDominatorTree &MDT;
     const MachineFrameInfo *MFI;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3639f80..6bddd78 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -760,12 +760,18 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
 
     bool Replace1 = false;
     SDValue N1 = Op.getOperand(1);
-    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
-    if (NN1.getNode() == 0)
-      return SDValue();
+    SDValue NN1;
+    if (N0 == N1)
+      NN1 = NN0;
+    else {
+      NN1 = PromoteOperand(N1, PVT, Replace1);
+      if (NN1.getNode() == 0)
+        return SDValue();
+    }
 
     AddToWorkList(NN0.getNode());
-    AddToWorkList(NN1.getNode());
+    if (NN1.getNode())
+      AddToWorkList(NN1.getNode());
 
     if (Replace0)
       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
@@ -3425,8 +3431,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
     if (NarrowLoad.getNode()) {
-      if (NarrowLoad.getNode() != N0.getNode())
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
 
@@ -3564,7 +3574,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 	  DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
 			N0.getOperand(1),
 			cast<CondCodeSDNode>(N0.getOperand(2))->get());
-	return 	DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(),  VT);
+	return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
       }
     }
 
@@ -3585,9 +3595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                       N0.getOperand(0), N0.getOperand(1),
                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
                          NegOne, DAG.getConstant(0, VT));
-  }
-  
-  
+  }  
 
   // fold (sext x) -> (zext x) if the sign bit is known zero.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3615,8 +3623,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   if (N0.getOpcode() == ISD::TRUNCATE) {
     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
     if (NarrowLoad.getNode()) {
-      if (NarrowLoad.getNode() != N0.getNode())
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
       return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
     }
   }
@@ -3726,8 +3738,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     }
   }
 
-  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   if (N0.getOpcode() == ISD::SETCC) {
+    if (!LegalOperations && VT.isVector()) {
+      // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+      // Only do this before legalize for now.
+      EVT N0VT = N0.getOperand(0).getValueType();
+      EVT EltVT = VT.getVectorElementType();
+      SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+                                    DAG.getConstant(1, EltVT));
+      if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                                         N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                           DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                       &OneOps[0], OneOps.size()));
+      } else {
+        // If the desired elements are smaller or larger than the source
+        // elements we can use a matching integer vector type and then
+        // truncate/sign extend
+        EVT MatchingElementType =
+          EVT::getIntegerVT(*DAG.getContext(),
+                            N0VT.getScalarType().getSizeInBits());
+        EVT MatchingVectorType =
+          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                           N0VT.getVectorNumElements());
+        SDValue VsetCC =
+          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                        N0.getOperand(1),
+                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+                           DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                       &OneOps[0], OneOps.size()));
+      }
+    }
+
+    // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
                        DAG.getConstant(1, VT), DAG.getConstant(0, VT),
@@ -3780,8 +3832,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   if (N0.getOpcode() == ISD::TRUNCATE) {
     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
     if (NarrowLoad.getNode()) {
-      if (NarrowLoad.getNode() != N0.getNode())
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
       return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
     }
   }
@@ -3883,8 +3939,39 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
 
-  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   if (N0.getOpcode() == ISD::SETCC) {
+    // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+	return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+			     N0.getOperand(1),
+			     cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+	EVT MatchingElementType =
+	  EVT::getIntegerVT(*DAG.getContext(),
+			    N0VT.getScalarType().getSizeInBits());
+	EVT MatchingVectorType =
+	  EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+			   N0VT.getVectorNumElements());
+	SDValue VsetCC =
+	  DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+			N0.getOperand(1),
+			cast<CondCodeSDNode>(N0.getOperand(2))->get());
+	return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+      }
+    }
+
+    // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
                        DAG.getConstant(1, VT), DAG.getConstant(0, VT),
@@ -5278,10 +5365,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
     SDValue Offset;
     ISD::MemIndexedMode AM = ISD::UNINDEXED;
     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
-      if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
-        std::swap(BasePtr, Offset);
-      if (Ptr != BasePtr)
-        continue;
       // Don't create a indexed load / store with zero offset.
       if (isa<ConstantSDNode>(Offset) &&
           cast<ConstantSDNode>(Offset)->isNullValue())
@@ -5953,6 +6036,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   SDValue InVal = N->getOperand(1);
   SDValue EltNo = N->getOperand(2);
 
+  // If the inserted element is an UNDEF, just use the input vector.
+  if (InVal.getOpcode() == ISD::UNDEF)
+    return InVec;
+
   // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
   // vector with the inserted element.
   if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6206,7 +6293,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   // all scalar elements the same.
   if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
     SDNode *V = N0.getNode();
-    
 
     // If this is a bit convert that changes the element type of the vector but
     // not the number of vector elements, look through it.  Be careful not to
@@ -6338,13 +6424,21 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
           break;
       }
 
-      Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
-                                EltType, LHSOp, RHSOp));
-      AddToWorkList(Ops.back().getNode());
-      assert((Ops.back().getOpcode() == ISD::UNDEF ||
-              Ops.back().getOpcode() == ISD::Constant ||
-              Ops.back().getOpcode() == ISD::ConstantFP) &&
-             "Scalar binop didn't fold!");
+      // If the vector element type is not legal, the BUILD_VECTOR operands
+      // are promoted and implicitly truncated.  Make that explicit here.
+      if (LHSOp.getValueType() != EltType)
+        LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
+      if (RHSOp.getValueType() != EltType)
+        RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
+
+      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+                                   LHSOp, RHSOp);
+      if (FoldOp.getOpcode() != ISD::UNDEF &&
+          FoldOp.getOpcode() != ISD::Constant &&
+          FoldOp.getOpcode() != ISD::ConstantFP)
+        break;
+      Ops.push_back(FoldOp);
+      AddToWorkList(FoldOp.getNode());
     }
 
     if (Ops.size() == LHS.getNumOperands()) {
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4c3833..95f4d07 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -56,6 +56,27 @@
 #include "FunctionLoweringInfo.h"
 using namespace llvm;
 
+bool FastISel::hasTrivialKill(const Value *V) const {
+  // Don't consider constants or arguments to have trivial kills.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
+
+  // No-op casts are trivially coalesced by fast-isel.
+  if (const CastInst *Cast = dyn_cast<CastInst>(I))
+    if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+        !hasTrivialKill(Cast->getOperand(0)))
+      return false;
+
+  // Only instructions with a single use in the same basic block are considered
+  // to have trivial kills.
+  return I->hasOneUse() &&
+         !(I->getOpcode() == Instruction::BitCast ||
+           I->getOpcode() == Instruction::PtrToInt ||
+           I->getOpcode() == Instruction::IntToPtr) &&
+         cast<Instruction>(I->use_begin())->getParent() == I->getParent();
+}
+
 unsigned FastISel::getRegForValue(const Value *V) {
   EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
@@ -78,12 +99,24 @@ unsigned FastISel::getRegForValue(const Value *V) {
   // cache values defined by Instructions across blocks, and other values
   // only locally. This is because Instructions already have the SSA
   // def-dominates-use requirement enforced.
-  if (ValueMap.count(V))
-    return ValueMap[V];
+  DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V);
+  if (I != ValueMap.end())
+    return I->second;
   unsigned Reg = LocalValueMap[V];
   if (Reg != 0)
     return Reg;
 
+  // In bottom-up mode, just create the virtual register which will be used
+  // to hold the value. It will be materialized later.
+  if (IsBottomUp) {
+    Reg = createResultReg(TLI.getRegClassFor(VT));
+    if (isa<Instruction>(V))
+      ValueMap[V] = Reg;
+    else
+      LocalValueMap[V] = Reg;
+    return Reg;
+  }
+
   return materializeRegForValue(V, VT);
 }
 
@@ -123,7 +156,8 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
         unsigned IntegerReg =
           getRegForValue(ConstantInt::get(V->getContext(), IntVal));
         if (IntegerReg != 0)
-          Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+          Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+                           IntegerReg, /*Kill=*/false);
       }
     }
   } else if (const Operator *Op = dyn_cast<Operator>(V)) {
@@ -174,25 +208,33 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
   else if (Reg != AssignedReg) {
     const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
     TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
-                     Reg, RegClass, RegClass);
+                     Reg, RegClass, RegClass, DL);
   }
   return AssignedReg;
 }
 
-unsigned FastISel::getRegForGEPIndex(const Value *Idx) {
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
   unsigned IdxN = getRegForValue(Idx);
   if (IdxN == 0)
     // Unhandled operand. Halt "fast" selection and bail.
-    return 0;
+    return std::pair<unsigned, bool>(0, false);
+
+  bool IdxNIsKill = hasTrivialKill(Idx);
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
   MVT PtrVT = TLI.getPointerTy();
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
-  if (IdxVT.bitsLT(PtrVT))
-    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
-  else if (IdxVT.bitsGT(PtrVT))
-    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
-  return IdxN;
+  if (IdxVT.bitsLT(PtrVT)) {
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+                      IdxN, IdxNIsKill);
+    IdxNIsKill = true;
+  }
+  else if (IdxVT.bitsGT(PtrVT)) {
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+                      IdxN, IdxNIsKill);
+    IdxNIsKill = true;
+  }
+  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
 }
 
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -224,10 +266,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
     // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
+  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
   // Check if the second operand is a constant and handle it appropriately.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
     unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
-                                     ISDOpcode, Op0, CI->getZExtValue());
+                                     ISDOpcode, Op0, Op0IsKill,
+                                     CI->getZExtValue());
     if (ResultReg != 0) {
       // We successfully emitted code for the given LLVM Instruction.
       UpdateValueMap(I, ResultReg);
@@ -238,7 +283,7 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
   // Check if the second operand is a constant float.
   if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
     unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
-                                     ISDOpcode, Op0, CF);
+                                     ISDOpcode, Op0, Op0IsKill, CF);
     if (ResultReg != 0) {
       // We successfully emitted code for the given LLVM Instruction.
       UpdateValueMap(I, ResultReg);
@@ -251,9 +296,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
     // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
+  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
   // Now we have both operands in registers. Emit the instruction.
   unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
-                                   ISDOpcode, Op0, Op1);
+                                   ISDOpcode,
+                                   Op0, Op0IsKill,
+                                   Op1, Op1IsKill);
   if (ResultReg == 0)
     // Target-specific code wasn't able to find a machine opcode for
     // the given ISD opcode and type. Halt "fast" selection and bail.
@@ -270,6 +319,8 @@ bool FastISel::SelectGetElementPtr(const User *I) {
     // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
+  bool NIsKill = hasTrivialKill(I->getOperand(0));
+
   const Type *Ty = I->getOperand(0)->getType();
   MVT VT = TLI.getPointerTy();
   for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -282,10 +333,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
         uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
         // FIXME: This can be optimized by combining the add with a
         // subsequent one.
-        N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
         if (N == 0)
           // Unhandled operand. Halt "fast" selection and bail.
           return false;
+        NIsKill = true;
       }
       Ty = StTy->getElementType(Field);
     } else {
@@ -296,27 +348,31 @@ bool FastISel::SelectGetElementPtr(const User *I) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs = 
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
-        N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
         if (N == 0)
           // Unhandled operand. Halt "fast" selection and bail.
           return false;
+        NIsKill = true;
         continue;
       }
       
       // N = N + Idx * ElementSize;
       uint64_t ElementSize = TD.getTypeAllocSize(Ty);
-      unsigned IdxN = getRegForGEPIndex(Idx);
+      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+      unsigned IdxN = Pair.first;
+      bool IdxNIsKill = Pair.second;
       if (IdxN == 0)
         // Unhandled operand. Halt "fast" selection and bail.
         return false;
 
       if (ElementSize != 1) {
-        IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+        IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
         if (IdxN == 0)
           // Unhandled operand. Halt "fast" selection and bail.
           return false;
+        IdxNIsKill = true;
       }
-      N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+      N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
       if (N == 0)
         // Unhandled operand. Halt "fast" selection and bail.
         return false;
@@ -338,7 +394,7 @@ bool FastISel::SelectCall(const User *I) {
   default: break;
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
-    if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) ||
+    if (!DIVariable(DI->getVariable()).Verify() ||
         !MF.getMMI().hasDebugInfo())
       return true;
 
@@ -402,7 +458,7 @@ bool FastISel::SelectCall(const User *I) {
       const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
       unsigned ResultReg = createResultReg(RC);
       bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                           Reg, RC, RC);
+                                           Reg, RC, RC, DL);
       assert(InsertedCopy && "Can't copy address registers!");
       InsertedCopy = InsertedCopy;
       UpdateValueMap(I, ResultReg);
@@ -432,17 +488,19 @@ bool FastISel::SelectCall(const User *I) {
       const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
       unsigned ResultReg = createResultReg(RC);
       bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
-                                           RC, RC);
+                                           RC, RC, DL);
       assert(InsertedCopy && "Can't copy address registers!");
       InsertedCopy = InsertedCopy;
 
+      bool ResultRegIsKill = hasTrivialKill(I);
+
       // Cast the register to the type of the selector.
       if (SrcVT.bitsGT(MVT::i32))
         ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                               ResultReg);
+                               ResultReg, ResultRegIsKill);
       else if (SrcVT.bitsLT(MVT::i32))
         ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                               ISD::SIGN_EXTEND, ResultReg);
+                               ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
       if (ResultReg == 0)
         // Unhandled operand. Halt "fast" selection and bail.
         return false;
@@ -490,12 +548,15 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
     // Unhandled operand.  Halt "fast" selection and bail.
     return false;
 
+  bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
   // If the operand is i1, arrange for the high bits in the register to be zero.
   if (SrcVT == MVT::i1) {
    SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
-   InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+   InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
    if (!InputReg)
      return false;
+   InputRegIsKill = true;
   }
   // If the result is i1, truncate to the target's type for i1 first.
   if (DstVT == MVT::i1)
@@ -504,7 +565,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
   unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
                                   DstVT.getSimpleVT(),
                                   Opcode,
-                                  InputReg);
+                                  InputReg, InputRegIsKill);
   if (!ResultReg)
     return false;
     
@@ -536,6 +597,8 @@ bool FastISel::SelectBitCast(const User *I) {
   if (Op0 == 0)
     // Unhandled operand. Halt "fast" selection and bail.
     return false;
+
+  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
   
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
@@ -545,7 +608,7 @@ bool FastISel::SelectBitCast(const User *I) {
     ResultReg = createResultReg(DstClass);
     
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         Op0, DstClass, SrcClass);
+                                         Op0, DstClass, SrcClass, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -553,7 +616,7 @@ bool FastISel::SelectBitCast(const User *I) {
   // If the reg-reg copy failed, select a BIT_CONVERT opcode.
   if (!ResultReg)
     ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
-                           ISD::BIT_CONVERT, Op0);
+                           ISD::BIT_CONVERT, Op0, Op0IsKill);
   
   if (!ResultReg)
     return false;
@@ -609,10 +672,12 @@ FastISel::SelectFNeg(const User *I) {
   unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
   if (OpReg == 0) return false;
 
+  bool OpRegIsKill = hasTrivialKill(I);
+
   // If the target has ISD::FNEG, use it.
   EVT VT = TLI.getValueType(I->getType());
   unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
-                                  ISD::FNEG, OpReg);
+                                  ISD::FNEG, OpReg, OpRegIsKill);
   if (ResultReg != 0) {
     UpdateValueMap(I, ResultReg);
     return true;
@@ -626,18 +691,19 @@ FastISel::SelectFNeg(const User *I) {
     return false;
 
   unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
-                               ISD::BIT_CONVERT, OpReg);
+                               ISD::BIT_CONVERT, OpReg, OpRegIsKill);
   if (IntReg == 0)
     return false;
 
-  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+                                       IntReg, /*Kill=*/true,
                                        UINT64_C(1) << (VT.getSizeInBits()-1),
                                        IntVT.getSimpleVT());
   if (IntResultReg == 0)
     return false;
 
   ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
-                         ISD::BIT_CONVERT, IntResultReg);
+                         ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
   if (ResultReg == 0)
     return false;
 
@@ -782,7 +848,8 @@ FastISel::FastISel(MachineFunction &mf,
     TM(MF.getTarget()),
     TD(*TM.getTargetData()),
     TII(*TM.getInstrInfo()),
-    TLI(*TM.getTargetLowering()) {
+    TLI(*TM.getTargetLowering()),
+    IsBottomUp(false) {
 }
 
 FastISel::~FastISel() {}
@@ -793,13 +860,15 @@ unsigned FastISel::FastEmit_(MVT, MVT,
 }
 
 unsigned FastISel::FastEmit_r(MVT, MVT,
-                              unsigned, unsigned /*Op0*/) {
+                              unsigned,
+                              unsigned /*Op0*/, bool /*Op0IsKill*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rr(MVT, MVT, 
-                               unsigned, unsigned /*Op0*/,
-                               unsigned /*Op0*/) {
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               unsigned /*Op1*/, bool /*Op1IsKill*/) {
   return 0;
 }
 
@@ -813,20 +882,23 @@ unsigned FastISel::FastEmit_f(MVT, MVT,
 }
 
 unsigned FastISel::FastEmit_ri(MVT, MVT,
-                               unsigned, unsigned /*Op0*/,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
                                uint64_t /*Imm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rf(MVT, MVT,
-                               unsigned, unsigned /*Op0*/,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
                                const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rri(MVT, MVT,
                                 unsigned,
-                                unsigned /*Op0*/, unsigned /*Op1*/,
+                                unsigned /*Op0*/, bool /*Op0IsKill*/,
+                                unsigned /*Op1*/, bool /*Op1IsKill*/,
                                 uint64_t /*Imm*/) {
   return 0;
 }
@@ -836,16 +908,18 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
 /// If that fails, it materializes the immediate into a register and try
 /// FastEmit_rr instead.
 unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
-                                unsigned Op0, uint64_t Imm,
-                                MVT ImmType) {
+                                unsigned Op0, bool Op0IsKill,
+                                uint64_t Imm, MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the ri form.
-  unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+  unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
   if (ResultReg != 0)
     return ResultReg;
   unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
   if (MaterialReg == 0)
     return 0;
-  return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+  return FastEmit_rr(VT, VT, Opcode,
+                     Op0, Op0IsKill,
+                     MaterialReg, /*Kill=*/true);
 }
 
 /// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
@@ -853,10 +927,10 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
 unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
-                                unsigned Op0, const ConstantFP *FPImm,
-                                MVT ImmType) {
+                                unsigned Op0, bool Op0IsKill,
+                                const ConstantFP *FPImm, MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
-  unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+  unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
   if (ResultReg != 0)
     return ResultReg;
 
@@ -886,11 +960,13 @@ unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
     if (IntegerReg == 0)
       return 0;
     MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
-                             ISD::SINT_TO_FP, IntegerReg);
+                             ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
     if (MaterialReg == 0)
       return 0;
   }
-  return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+  return FastEmit_rr(VT, VT, Opcode,
+                     Op0, Op0IsKill,
+                     MaterialReg, /*Kill=*/true);
 }
 
 unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
@@ -908,16 +984,16 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
-                                  unsigned Op0) {
+                                  unsigned Op0, bool Op0IsKill) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill);
   else {
-    BuildMI(MBB, DL, II).addReg(Op0);
+    BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -927,16 +1003,21 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
-                                   unsigned Op0, unsigned Op1) {
+                                   unsigned Op0, bool Op0IsKill,
+                                   unsigned Op1, bool Op1IsKill) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+    BuildMI(MBB, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill);
   else {
-    BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+    BuildMI(MBB, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -945,16 +1026,21 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
-                                   unsigned Op0, uint64_t Imm) {
+                                   unsigned Op0, bool Op0IsKill,
+                                   uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+    BuildMI(MBB, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm);
   else {
-    BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+    BuildMI(MBB, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -963,16 +1049,21 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
-                                   unsigned Op0, const ConstantFP *FPImm) {
+                                   unsigned Op0, bool Op0IsKill,
+                                   const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+    BuildMI(MBB, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addFPImm(FPImm);
   else {
-    BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+    BuildMI(MBB, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addFPImm(FPImm);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -981,16 +1072,24 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
                                     const TargetRegisterClass *RC,
-                                    unsigned Op0, unsigned Op1, uint64_t Imm) {
+                                    unsigned Op0, bool Op0IsKill,
+                                    unsigned Op1, bool Op1IsKill,
+                                    uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+    BuildMI(MBB, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm);
   else {
-    BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+    BuildMI(MBB, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -1008,7 +1107,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   else {
     BuildMI(MBB, DL, II).addImm(Imm);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -1016,18 +1115,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
 }
 
 unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
-                                              unsigned Op0, uint32_t Idx) {
+                                              unsigned Op0, bool Op0IsKill,
+                                              uint32_t Idx) {
   const TargetRegisterClass* RC = MRI.getRegClass(Op0);
   
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
   const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
   
   if (II.getNumDefs() >= 1)
-    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+    BuildMI(MBB, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Idx);
   else {
-    BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+    BuildMI(MBB, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Idx);
     bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                         II.ImplicitDefs[0], RC, RC);
+                                         II.ImplicitDefs[0], RC, RC, DL);
     if (!InsertedCopy)
       ResultReg = 0;
   }
@@ -1036,8 +1140,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
 
 /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
 /// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
-  return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+  return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
 }
 
 /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
@@ -1070,6 +1174,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
     // emitted yet.
     for (BasicBlock::const_iterator I = SuccBB->begin();
          const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
       // Ignore dead phi's.
       if (PN->use_empty()) continue;
 
@@ -1092,12 +1197,19 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
 
       const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
 
+      // Set the DebugLoc for the copy. Prefer the location of the operand
+      // if there is one; use the location of the PHI otherwise.
+      DL = PN->getDebugLoc();
+      if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+        DL = Inst->getDebugLoc();
+
       unsigned Reg = getRegForValue(PHIOp);
       if (Reg == 0) {
         PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
         return false;
       }
       PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+      DL = DebugLoc();
     }
   }
 
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5dae82..16eb8a7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -143,7 +143,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
     // Create the reg, emit the copy.
     VRBase = MRI->createVirtualRegister(DstRC);
     bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
-                                     DstRC, SrcRC);
+                                     DstRC, SrcRC, Node->getDebugLoc());
 
     assert(Emitted && "Unable to issue a copy instruction!\n");
     (void) Emitted;
@@ -265,7 +265,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                                  unsigned IIOpNum,
                                  const TargetInstrDesc *II,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
-                                 bool IsDebug) {
+                                 bool IsDebug, bool IsClone, bool IsCloned) {
   assert(Op.getValueType() != MVT::Other &&
          Op.getValueType() != MVT::Flag &&
          "Chain and flag operands should occur at end of operand list!");
@@ -289,7 +289,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
       bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
-                                       DstRC, SrcRC);
+                                       DstRC, SrcRC, Op.getNode()->getDebugLoc());
       assert(Emitted && "Unable to issue a copy instruction!\n");
       (void) Emitted;
       VReg = NewVReg;
@@ -297,15 +297,25 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
   }
 
   // If this value has only one use, that use is a kill. This is a
-  // conservative approximation. Tied operands are never killed, so we need
-  // to check that. And that means we need to determine the index of the
-  // operand.
-  unsigned Idx = MI->getNumOperands();
-  while (Idx > 0 &&
-         MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
-    --Idx;
-  bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
-  bool isKill = Op.hasOneUse() && !isTied && !IsDebug;
+  // conservative approximation. InstrEmitter does trivial coalescing
+  // with CopyFromReg nodes, so don't emit kill flags for them.
+  // Avoid kill flags on Schedule cloned nodes, since there will be
+  // multiple uses.
+  // Tied operands are never killed, so we need to check that. And that
+  // means we need to determine the index of the operand.
+  bool isKill = Op.hasOneUse() &&
+                Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+                !IsDebug &&
+                !(IsClone || IsCloned);
+  if (isKill) {
+    unsigned Idx = MI->getNumOperands();
+    while (Idx > 0 &&
+           MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+      --Idx;
+    bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+    if (isTied)
+      isKill = false;
+  }
 
   MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
                                            false/*isImp*/, isKill,
@@ -322,9 +332,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                               unsigned IIOpNum,
                               const TargetInstrDesc *II,
                               DenseMap<SDValue, unsigned> &VRBaseMap,
-                              bool IsDebug) {
+                              bool IsDebug, bool IsClone, bool IsCloned) {
   if (Op.isMachineOpcode()) {
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+                       IsDebug, IsClone, IsCloned);
   } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
   } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
@@ -373,7 +384,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
            "Chain and flag operands should occur at end of operand list!");
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+                       IsDebug, IsClone, IsCloned);
   }
 }
 
@@ -395,7 +407,8 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC,
 /// EmitSubregNode - Generate machine code for subreg nodes.
 ///
 void InstrEmitter::EmitSubregNode(SDNode *Node, 
-                                  DenseMap<SDValue, unsigned> &VRBaseMap){
+                                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                                  bool IsClone, bool IsCloned) {
   unsigned VRBase = 0;
   unsigned Opc = Node->getMachineOpcode();
   
@@ -439,7 +452,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
 
     // Add def, source, and subreg index
     MI->addOperand(MachineOperand::CreateReg(VRBase, true));
-    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
     MI->addOperand(MachineOperand::CreateImm(SubIdx));
     MBB->insert(InsertPos, MI);
   } else if (Opc == TargetOpcode::INSERT_SUBREG ||
@@ -473,9 +487,11 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
       MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
     } else
-      AddOperand(MI, N0, 0, 0, VRBaseMap);
+      AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
     // Add the subregster being inserted
-    AddOperand(MI, N1, 0, 0, VRBaseMap);
+    AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
     MI->addOperand(MachineOperand::CreateImm(SubIdx));
     MBB->insert(InsertPos, MI);
   } else
@@ -503,7 +519,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
   // Create the new VReg in the destination class and emit a copy.
   unsigned NewVReg = MRI->createVirtualRegister(DstRC);
   bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
-                                   DstRC, SrcRC);
+                                   DstRC, SrcRC, Node->getDebugLoc());
   assert(Emitted &&
          "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
   (void) Emitted;
@@ -517,7 +533,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
 /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
 ///
 void InstrEmitter::EmitRegSequence(SDNode *Node,
-                                  DenseMap<SDValue, unsigned> &VRBaseMap) {
+                                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                                  bool IsClone, bool IsCloned) {
   const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
   unsigned NewVReg = MRI->createVirtualRegister(RC);
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -528,17 +545,21 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
   for (unsigned i = 0; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
-#ifndef NDEBUG
     if (i & 1) {
       unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
       unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
-    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
-    const TargetRegisterClass *SRC =
-      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
-    assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE");
+      const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+      const TargetRegisterClass *SRC =
+        TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+      if (!SRC)
+        llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
+      if (SRC != RC) {
+        MRI->setRegClass(NewVReg, SRC);
+        RC = SRC;
+      }
     }
-#endif
-    AddOperand(MI, Op, i+1, &II, VRBaseMap);
+    AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
   }
 
   MBB->insert(InsertPos, MI);
@@ -579,11 +600,17 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
       MIB.addReg(0U);       // undef
     else
       AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
-                 true /*IsDebug*/);
+                 /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
   } else if (SD->getKind() == SDDbgValue::CONST) {
     const Value *V = SD->getConst();
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      MIB.addImm(CI->getSExtValue());
+      // FIXME: SDDbgValues aren't updated with legalization, so it's possible
+      // to have i128 values in them at this point. As a crude workaround, just
+      // drop the debug info if this happens.
+      if (!CI->getValue().isSignedIntN(64))
+        MIB.addReg(0U);
+      else
+        MIB.addImm(CI->getSExtValue());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       MIB.addFPImm(CF);
     } else {
@@ -612,7 +639,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   if (Opc == TargetOpcode::EXTRACT_SUBREG || 
       Opc == TargetOpcode::INSERT_SUBREG ||
       Opc == TargetOpcode::SUBREG_TO_REG) {
-    EmitSubregNode(Node, VRBaseMap);
+    EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
     return;
   }
 
@@ -624,7 +651,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 
   // Handle REG_SEQUENCE specially.
   if (Opc == TargetOpcode::REG_SEQUENCE) {
-    EmitRegSequence(Node, VRBaseMap);
+    EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
     return;
   }
 
@@ -663,7 +690,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
   for (unsigned i = NumSkip; i != NodeOperands; ++i)
     AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
-               VRBaseMap);
+               VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
 
   // Transfer all of the memory reference descriptions of this instruction.
   MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
@@ -749,7 +776,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
                                             Node->getOperand(1).getValueType());
 
     bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
-                                     DstTRC, SrcTRC);
+                                     DstTRC, SrcTRC, Node->getDebugLoc());
     assert(Emitted && "Unable to issue a copy instruction!\n");
     (void) Emitted;
     break;
@@ -810,7 +837,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
         for (; NumVals; --NumVals, ++i)
-          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+                     /*IsDebug=*/false, IsClone, IsCloned);
         break;
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c7e7c71..02c044c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -65,7 +65,7 @@ class InstrEmitter {
                           unsigned IIOpNum,
                           const TargetInstrDesc *II,
                           DenseMap<SDValue, unsigned> &VRBaseMap,
-                          bool IsDebug = false);
+                          bool IsDebug, bool IsClone, bool IsCloned);
 
   /// AddOperand - Add the specified operand to the specified machine instr.  II
   /// specifies the instruction information for the node, and IIOpNum is the
@@ -75,11 +75,12 @@ class InstrEmitter {
                   unsigned IIOpNum,
                   const TargetInstrDesc *II,
                   DenseMap<SDValue, unsigned> &VRBaseMap,
-                  bool IsDebug = false);
+                  bool IsDebug, bool IsClone, bool IsCloned);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
-  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+                      bool IsClone, bool IsCloned);
 
   /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
   /// COPY_TO_REGCLASS is just a normal copy, except that the destination
@@ -90,7 +91,8 @@ class InstrEmitter {
 
   /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
   ///
-  void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+  void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+                       bool IsClone, bool IsCloned);
 public:
   /// CountResults - The results of target nodes have register or immediate
   /// operands first, then an optional chain, and optional flag operands
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bedfa57..62a37a5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -2027,6 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     return Result;
   }
   assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+  // Code below here assumes !isSigned without checking again.
 
   // Implementation of unsigned i64 to f64 following the algorithm in
   // __floatundidf in compiler_rt. This implementation has the advantage
@@ -2052,6 +2052,41 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
   }
 
+  // Implementation of unsigned i64 to f32.  This implementation has the
+  // advantage of performing rounding correctly.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+    EVT SHVT = TLI.getShiftAmountTy();
+
+    SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, 
+         DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+         DAG.getConstant(UINT64_C(0x800), MVT::i64));
+    SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, 
+         DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+    SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+                   And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+    SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+    SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+                   Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+                    ISD::SETUGE);
+    SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+
+    SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+                             DAG.getConstant(32, SHVT));
+    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+    SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+    SDValue TwoP32 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+    SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+    SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+    SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+    SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+    return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+                       DAG.getIntPtrConstant(0));
+
+  }
+
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
@@ -2488,6 +2523,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     EVT VT = Node->getValueType(0);
     EVT EltVT = VT.getVectorElementType();
+    if (getTypeAction(EltVT) == Promote)
+      EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
     unsigned NumElems = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0; i != NumElems; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 548454c..8b382bc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2314,13 +2314,29 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
   return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
 }
 
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
+  case MVT::f32:     return &APFloat::IEEEsingle;
+  case MVT::f64:     return &APFloat::IEEEdouble;
+  case MVT::f80:     return &APFloat::x87DoubleExtended;
+  case MVT::f128:    return &APFloat::IEEEquad;
+  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+  }
+}
+
 SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
   EVT SrcVT = Op.getValueType();
   EVT DstVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+  // The following optimization is valid only if every value in SrcVT (when
+  // treated as signed) is representable in DstVT.  Check that the mantissa
+  // size of DstVT is >= than the number of bits in SrcVT -1.
+  const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
+  if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+      TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
     // Do a signed conversion then adjust the result.
     SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
     SignedConv = TLI.LowerOperation(SignedConv, DAG);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d60ad60..c665963 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -32,7 +32,7 @@ namespace llvm {
 /// involves promoting small sizes to large sizes or splitting up large values
 /// into small values.
 ///
-class VISIBILITY_HIDDEN DAGTypeLegalizer {
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   const TargetLowering &TLI;
   SelectionDAG &DAG;
 public:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index b92a672..56f5ded 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/Statistic.h"
 #include <climits>
 using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index da02850..820ba66 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -53,6 +52,12 @@ static RegisterScheduler
                          "order when possible",
                          createSourceListDAGScheduler);
 
+static RegisterScheduler
+  hybridListDAGScheduler("list-hybrid",
+                         "Bottom-up rr list scheduling which avoid stalls for "
+                         "long latency instructions",
+                         createHybridListDAGScheduler);
+
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -64,6 +69,10 @@ private:
   /// it is top-down.
   bool isBottomUp;
 
+  /// NeedLatency - True if the scheduler will make use of latency information.
+  ///
+  bool NeedLatency;
+
   /// AvailableQueue - The priority queue to use for the available SUnits.
   SchedulingPriorityQueue *AvailableQueue;
 
@@ -80,9 +89,9 @@ private:
 
 public:
   ScheduleDAGRRList(MachineFunction &mf,
-                    bool isbottomup,
+                    bool isbottomup, bool needlatency,
                     SchedulingPriorityQueue *availqueue)
-    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
       AvailableQueue(availqueue), Topo(SUnits) {
     }
 
@@ -161,9 +170,11 @@ private:
     return NewNode;
   }
 
-  /// ForceUnitLatencies - Return true, since register-pressure-reducing
-  /// scheduling doesn't need actual latency information.
-  bool ForceUnitLatencies() const { return true; }
+  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+  /// need actual latency information but the hybrid scheduler does.
+  bool ForceUnitLatencies() const {
+    return !NeedLatency;
+  }
 };
 }  // end anonymous namespace
 
@@ -213,6 +224,12 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 #endif
   --PredSU->NumSuccsLeft;
 
+  if (!ForceUnitLatencies()) {
+    // Updating predecessor's height. This is now the cycle when the
+    // predecessor can be scheduled without causing a pipeline stall.
+    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+  }
+
   // If all the node's successors are scheduled, this node is ready
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -244,10 +261,15 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
-  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+#ifndef NDEBUG
+  if (CurCycle < SU->getHeight())
+    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
+#endif
+
+  // FIXME: Handle noop hazard.
   SU->setHeightToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
@@ -339,6 +361,7 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
       SU->isAvailable = false;
     UnscheduleNodeBottomUp(OldSU);
     --CurCycle;
+    AvailableQueue->setCurCycle(CurCycle);
   }
 
   assert(!SU->isSucc(OldSU) && "Something is wrong!");
@@ -386,7 +409,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -504,7 +527,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
   NewSU = CreateClone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -786,7 +809,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
+          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
@@ -795,7 +818,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
+        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
@@ -821,6 +844,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
     if (CurSU)
       ScheduleNodeBottomUp(CurSU, CurCycle);
     ++CurCycle;
+    AvailableQueue->setCurCycle(CurCycle);
   }
 
   // Reverse the order if it is bottom up.
@@ -889,6 +913,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 /// schedulers.
 void ScheduleDAGRRList::ListScheduleTopDown() {
   unsigned CurCycle = 0;
+  AvailableQueue->setCurCycle(CurCycle);
 
   // Release any successors of the special Entry node.
   ReleaseSuccessors(&EntrySU);
@@ -911,6 +936,7 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
     if (CurSU)
       ScheduleNodeTopDown(CurSU, CurCycle);
     ++CurCycle;
+    AvailableQueue->setCurCycle(CurCycle);
   }
   
 #ifndef NDEBUG
@@ -956,6 +982,16 @@ namespace {
     
     bool operator()(const SUnit* left, const SUnit* right) const;
   };
+
+  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
+    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
+      : SPQ(spq) {}
+    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+      : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
 }  // end anonymous namespace
 
 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
@@ -990,8 +1026,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
 namespace {
   template<class SF>
   class RegReductionPriorityQueue : public SchedulingPriorityQueue {
-    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
-    unsigned currentQueueId;
+    std::vector<SUnit*> Queue;
+    SF Picker;
+    unsigned CurQueueId;
 
   protected:
     // SUnits - The SUnits for the current graph.
@@ -1007,7 +1044,7 @@ namespace {
   public:
     RegReductionPriorityQueue(const TargetInstrInfo *tii,
                               const TargetRegisterInfo *tri)
-      : Queue(SF(this)), currentQueueId(0),
+      : Picker(this), CurQueueId(0),
         TII(tii), TRI(tri), scheduleDAG(NULL) {}
     
     void initNodes(std::vector<SUnit> &sunits) {
@@ -1067,26 +1104,26 @@ namespace {
     unsigned getNodeOrdering(const SUnit *SU) const {
       return scheduleDAG->DAG->GetOrdering(SU->getNode());
     }
-    
-    unsigned size() const { return Queue.size(); }
 
     bool empty() const { return Queue.empty(); }
     
     void push(SUnit *U) {
       assert(!U->NodeQueueId && "Node in the queue already");
-      U->NodeQueueId = ++currentQueueId;
-      Queue.push(U);
+      U->NodeQueueId = ++CurQueueId;
+      Queue.push_back(U);
     }
 
-    void push_all(const std::vector<SUnit *> &Nodes) {
-      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
-        push(Nodes[i]);
-    }
-    
     SUnit *pop() {
       if (empty()) return NULL;
-      SUnit *V = Queue.top();
-      Queue.pop();
+      std::vector<SUnit *>::iterator Best = Queue.begin();
+      for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+           E = Queue.end(); I != E; ++I)
+        if (Picker(*Best, *I))
+          Best = I;
+      SUnit *V = *Best;
+      if (Best != prior(Queue.end()))
+        std::swap(*Best, Queue.back());
+      Queue.pop_back();
       V->NodeQueueId = 0;
       return V;
     }
@@ -1094,7 +1131,11 @@ namespace {
     void remove(SUnit *SU) {
       assert(!Queue.empty() && "Queue is empty!");
       assert(SU->NodeQueueId != 0 && "Not in queue!");
-      Queue.erase_one(SU);
+      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+                                                   SU);
+      if (I != prior(Queue.end()))
+        std::swap(*I, Queue.back());
+      Queue.pop_back();
       SU->NodeQueueId = 0;
     }
 
@@ -1117,6 +1158,9 @@ namespace {
 
   typedef RegReductionPriorityQueue<src_ls_rr_sort>
     SrcRegReductionPriorityQueue;
+
+  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+    HybridBURRPriorityQueue;
 }
 
 /// closestSucc - Returns the scheduled cycle of the successor which is
@@ -1203,7 +1247,7 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
 }
 
 // Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   unsigned LOrder = SPQ->getNodeOrdering(left);
   unsigned ROrder = SPQ->getNodeOrdering(right);
 
@@ -1215,6 +1259,25 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
   return BURRSort(left, right, SPQ);
 }
 
+bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+  bool LStall = left->SchedulingPref == Sched::Latency &&
+    SPQ->getCurCycle() < left->getHeight();
+  bool RStall = right->SchedulingPref == Sched::Latency &&
+    SPQ->getCurCycle() < right->getHeight();
+  // If scheduling one of the node will cause a pipeline stall, delay it.
+  // If scheduling either one of the node will cause a pipeline stall, sort them
+  // according to their height.
+  // If neither will cause a pipeline stall, try to reduce register pressure.
+  if (LStall) {
+    if (!RStall)
+      return true;
+    if (left->getHeight() != right->getHeight())
+      return left->getHeight() > right->getHeight();
+  } else if (RStall)
+      return false;
+  return BURRSort(left, right, SPQ);
+}
+
 template<class SF>
 bool
 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
@@ -1379,8 +1442,8 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 
     // Ok, the transformation is safe and the heuristics suggest it is
     // profitable. Update the graph.
-    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
-                 << " next to PredSU # " << PredSU->NodeNum
+    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
+                 << " next to PredSU #" << PredSU->NodeNum
                  << " to guide scheduling in the presence of multiple uses\n");
     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
       SDep Edge = PredSU->Succs[i];
@@ -1469,7 +1532,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
-          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
+          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
                        << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
           scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
                                         /*Reg=*/0, /*isNormalMemory=*/false,
@@ -1563,8 +1626,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
   
   BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
 
-  ScheduleDAGRRList *SD =
-    new ScheduleDAGRRList(*IS->MF, true, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
   PQ->setScheduleDAG(SD);
   return SD;  
 }
@@ -1577,8 +1639,7 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
   
   TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
 
-  ScheduleDAGRRList *SD =
-    new ScheduleDAGRRList(*IS->MF, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
   PQ->setScheduleDAG(SD);
   return SD;
 }
@@ -1591,8 +1652,20 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
   
   SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
 
-  ScheduleDAGRRList *SD =
-    new ScheduleDAGRRList(*IS->MF, true, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+  PQ->setScheduleDAG(SD);
+  return SD;  
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  
+  HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
+
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
   PQ->setScheduleDAG(SD);
   return SD;  
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 76e4771..3185c88 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtarget.h"
 #include "llvm/ADT/DenseMap.h"
@@ -44,6 +45,24 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
   ScheduleDAG::Run(bb, insertPos);
 }
 
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+  const SUnit *Addr = 0;
+  if (!SUnits.empty())
+    Addr = &SUnits[0];
+#endif
+  SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+  assert((Addr == 0 || Addr == &SUnits[0]) &&
+         "SUnits std::vector reallocated on the fly!");
+  SUnits.back().OrigNode = &SUnits.back();
+  SUnit *SU = &SUnits.back();
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  SU->SchedulingPref = TLI.getSchedulingPreference(N);
+  return SU;
+}
+
 SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SUnit *SU = NewSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
@@ -52,6 +71,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
   SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+  SU->SchedulingPref = Old->SchedulingPref;
   Old->isCloned = true;
   return SU;
 }
@@ -217,9 +237,6 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   // This is a temporary workaround.
   SUnits.reserve(NumNodes * 2);
   
-  // Check to see if the scheduler cares about latencies.
-  bool UnitLatencies = ForceUnitLatencies();
-
   // Add all nodes in depth first order.
   SmallVector<SDNode*, 64> Worklist;
   SmallPtrSet<SDNode*, 64> Visited;
@@ -282,10 +299,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     N->setNodeId(NodeSUnit->NodeNum);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
-    if (UnitLatencies)
-      NodeSUnit->Latency = 1;
-    else
-      ComputeLatency(NodeSUnit);
+    ComputeLatency(NodeSUnit);
   }
 }
 
@@ -353,7 +367,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpSU->Latency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
@@ -377,7 +391,17 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
 }
 
 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  // Check to see if the scheduler cares about latencies.
+  if (ForceUnitLatencies()) {
+    SU->Latency = 1;
+    return;
+  }
+
   const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty()) {
+    SU->Latency = 1;
+    return;
+  }
   
   // Compute the latency for the node.  We use the sum of the latencies for
   // all nodes flagged together into this SUnit.
@@ -389,6 +413,37 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
     }
 }
 
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+                                               unsigned OpIdx, SDep& dep) const{
+  // Check to see if the scheduler cares about latencies.
+  if (ForceUnitLatencies())
+    return;
+
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty())
+    return;
+  
+  if (dep.getKind() != SDep::Data)
+    return;
+
+  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+  if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
+    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+    if (DefIdx >= II.getNumDefs())
+      return;
+    int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
+    if (DefCycle < 0)
+      return;
+    const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+    int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+    if (UseCycle >= 0) {
+      int Latency = DefCycle - UseCycle + 1;
+      if (Latency >= 0)
+        dep.setLatency(Latency);
+    }
+  }
+}
+
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   if (!SU->getNode()) {
     dbgs() << "PHYS REG COPY\n";
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 7ae8ec2..e8714ba 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -66,18 +66,7 @@ namespace llvm {
 
     /// NewSUnit - Creates a new SUnit and return a ptr to it.
     ///
-    SUnit *NewSUnit(SDNode *N) {
-#ifndef NDEBUG
-      const SUnit *Addr = 0;
-      if (!SUnits.empty())
-        Addr = &SUnits[0];
-#endif
-      SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
-      assert((Addr == 0 || Addr == &SUnits[0]) &&
-             "SUnits std::vector reallocated on the fly!");
-      SUnits.back().OrigNode = &SUnits.back();
-      return &SUnits.back();
-    }
+    SUnit *NewSUnit(SDNode *N);
 
     /// Clone - Creates a clone of the specified SUnit. It does not copy the
     /// predecessors / successors info nor the temporary scheduling states.
@@ -94,6 +83,15 @@ namespace llvm {
     ///
     virtual void ComputeLatency(SUnit *SU);
 
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { }
+
+    virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+                                       unsigned OpIdx, SDep& dep) const;
+
     virtual MachineBasicBlock *EmitSchedule();
 
     /// Schedule - Order nodes according to selected style, filling
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e6df742..38bf68b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -15,6 +15,7 @@
 #include "SDNodeOrdering.h"
 #include "SDNodeDbgValue.h"
 #include "llvm/Constants.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
@@ -32,6 +33,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
@@ -789,7 +791,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 
 // EntryNode could meaningfully have debug info if we can find it...
 SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
-  : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli),
+  : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+    FLI(fli),
     EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
@@ -963,8 +966,18 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
   EVT EltVT = VT.getScalarType();
   if (EltVT==MVT::f32)
     return getConstantFP(APFloat((float)Val), VT, isTarget);
-  else
+  else if (EltVT==MVT::f64)
     return getConstantFP(APFloat(Val), VT, isTarget);
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+    bool ignored;
+    APFloat apf = APFloat(Val);
+    apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+                &ignored);
+    return getConstantFP(apf, VT, isTarget);
+  } else {
+    assert(0 && "Unsupported type in getConstantFP");
+    return SDValue();
+  }
 }
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
@@ -2614,7 +2627,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     }
     break;
   case ISD::AND:
-    assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
     // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
     // worth handling here.
@@ -2627,7 +2641,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::XOR:
   case ISD::ADD:
   case ISD::SUB:
-    assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
     // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so
     // it's worth handling here.
@@ -2642,7 +2657,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::SDIV:
   case ISD::SREM:
     assert(VT.isInteger() && "This operator does not apply to FP types!");
-    // fall through
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
@@ -2665,6 +2682,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
             return N1;
       }
     }
+    assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
     assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
     break;
@@ -3525,7 +3543,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Then check to see if we should lower the memcpy with target-specific
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
-    TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+    TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
                                 isVol, AlwaysInline,
                                 DstSV, DstSVOff, SrcSV, SrcSVOff);
   if (Result.getNode())
@@ -3590,7 +3608,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Then check to see if we should lower the memmove with target-specific
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
-    TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+    TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
                                  DstSV, DstSVOff, SrcSV, SrcSVOff);
   if (Result.getNode())
     return Result;
@@ -3641,7 +3659,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Then check to see if we should lower the memset with target-specific
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
-    TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+    TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
                                 DstSV, DstSVOff);
   if (Result.getNode())
     return Result;
@@ -5417,6 +5435,8 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
     sys::SmartScopedLock<true> Lock(*VTMutex);
     return &(*EVTs->insert(VT).first);
   } else {
+    assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+           "Value type out of range!");
     return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
   }
 }
@@ -5607,6 +5627,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::LSDAADDR: return "LSDAADDR";
   case ISD::EHSELECTION: return "EHSELECTION";
   case ISD::EH_RETURN: return "EH_RETURN";
+  case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+  case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
   case ISD::ConstantPool:  return "ConstantPool";
   case ISD::ExternalSymbol: return "ExternalSymbol";
   case ISD::BlockAddress:  return "BlockAddress";
@@ -6008,6 +6030,21 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
 
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
+
+  DebugLoc dl = getDebugLoc();
+  if (G && !dl.isUnknown()) {
+    DIScope
+      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+    OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
+    if (Scope.Verify())
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << dl.getLine();
+    if (dl.getCol() != 0)
+      OS << ':' << dl.getCol();
+  }
 }
 
 void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38b204..fbe601f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3726,6 +3726,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
   return true;
 }
 
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
 /// we want to emit this as a call to a named external function, return the name
 /// otherwise lower it and return null.
@@ -3818,7 +3824,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+    if (!DIVariable(DI.getVariable()).Verify())
       return 0;
 
     MDNode *Variable = DI.getVariable();
@@ -3881,7 +3887,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
-    if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+    if (!DIVariable(DI.getVariable()).Verify())
       return 0;
 
     MDNode *Variable = DI.getVariable();
@@ -3900,6 +3906,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
       DAG.AddDbgValue(SDV, 0, false);
     } else {
+      bool createUndef = false;
+      // FIXME : Why not use getValue() directly ?
       SDValue &N = NodeMap[V];
       if (N.getNode()) {
         if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
@@ -3907,7 +3915,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                 N.getResNo(), Offset, dl, SDNodeOrder);
           DAG.AddDbgValue(SDV, N.getNode(), false);
         }
-      } else {
+      } else if (isa<PHINode>(V) && !V->use_empty()) {
+        SDValue N = getValue(V);
+        if (N.getNode()) {
+          if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+            SDV = DAG.getDbgValue(Variable, N.getNode(),
+                                  N.getResNo(), Offset, dl, SDNodeOrder);
+            DAG.AddDbgValue(SDV, N.getNode(), false);
+          }
+        } else
+          createUndef = true;
+      } else
+        createUndef = true;
+      if (createUndef) {
         // We may expand this to cover more cases.  One case where we have no
         // data available is an unreferenced parameter; we need this fallback.
         SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
@@ -4018,6 +4038,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     MMI.setCurrentCallSite(CI->getZExtValue());
     return 0;
   }
+  case Intrinsic::eh_sjlj_setjmp: {
+    setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_longjmp: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+                            getRoot(),
+                            getValue(I.getOperand(1))));
+    return 0;
+  }
 
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
@@ -4924,7 +4955,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
 namespace llvm {
 /// AsmOperandInfo - This contains information for each constraint that we are
 /// lowering.
-class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
     public TargetLowering::AsmOperandInfo {
 public:
   /// CallOperand - If this is the result output operand or a clobber
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 422cb7a..65b8d4f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -25,9 +25,11 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -131,11 +133,13 @@ namespace llvm {
 
     if (OptLevel == CodeGenOpt::None)
       return createFastDAGScheduler(IS, OptLevel);
-    if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+    if (TLI.getSchedulingPreference() == Sched::Latency)
       return createTDListDAGScheduler(IS, OptLevel);
-    assert(TLI.getSchedulingPreference() ==
-           TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
-    return createBURRListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::RegPressure)
+      return createBURRListDAGScheduler(IS, OptLevel);
+    assert(TLI.getSchedulingPreference() == Sched::Hybrid &&
+           "Unknown sched type!");
+    return createHybridListDAGScheduler(IS, OptLevel);
   }
 }
 
@@ -188,6 +192,39 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
+/// other function that gcc recognizes as "returning twice". This is used to
+/// limit code-gen optimizations on the machine function.
+///
+/// FIXME: Remove after <rdar://problem/8031714> is fixed.
+static bool FunctionCallsSetJmp(const Function *F) {
+  const Module *M = F->getParent();
+  static const char *ReturnsTwiceFns[] = {
+    "setjmp",
+    "sigsetjmp",
+    "setjmp_syscall",
+    "savectx",
+    "qsetjmp",
+    "vfork",
+    "getcontext"
+  };
+#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
+
+  for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
+    if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
+      if (!Callee->use_empty())
+        for (Value::const_use_iterator
+               I = Callee->use_begin(), E = Callee->use_end();
+             I != E; ++I)
+          if (const CallInst *CI = dyn_cast<CallInst>(I))
+            if (CI->getParent()->getParent() == F)
+              return true;
+    }
+
+  return false;
+#undef NUM_RETURNS_TWICE_FNS
+}
+
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -218,6 +255,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   MachineBasicBlock *EntryMBB = MF->begin();
   RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
 
+  DenseMap<unsigned, unsigned> LiveInMap;
+  if (!FuncInfo->ArgDbgValues.empty())
+    for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+           E = RegInfo->livein_end(); LI != E; ++LI)
+      if (LI->second) 
+        LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
   // Insert DBG_VALUE instructions for function arguments to the entry block.
   for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
     MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
@@ -230,8 +274,44 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       // FIXME: VR def may not be in entry block.
       Def->getParent()->insert(llvm::next(InsertPos), MI);
     }
+
+    // If Reg is live-in then update debug info to track its copy in a vreg.
+    DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+    if (LDI != LiveInMap.end()) {
+      MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+      MachineBasicBlock::iterator InsertPos = Def;
+      const MDNode *Variable = 
+        MI->getOperand(MI->getNumOperands()-1).getMetadata();
+      unsigned Offset = MI->getOperand(1).getImm();
+      // Def is never a terminator here, so it is ok to increment InsertPos.
+      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), 
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(LDI->second, RegState::Debug)
+        .addImm(Offset).addMetadata(Variable);
+    }
+  }
+
+  // Determine if there are any calls in this machine function.
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  if (!MFI->hasCalls()) {
+    for (MachineFunction::const_iterator
+           I = MF->begin(), E = MF->end(); I != E; ++I) {
+      const MachineBasicBlock *MBB = I;
+      for (MachineBasicBlock::const_iterator
+             II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+        const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+        if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) {
+          MFI->setHasCalls(true);
+          goto done;
+        }
+      }
+    }
+  done:;
   }
 
+  // Determine if there is a call to setjmp in the machine function.
+  MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
   FuncInfo->clear();
@@ -662,6 +742,7 @@ void SelectionDAGISel::DoInstructionSelection() {
     
     CurDAG->setRoot(Dummy.getValue());
   }    
+
   DEBUG(errs() << "===== Instruction selection ends:\n");
 
   PostprocessISelDAG();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8a4a1b1..44a80d3 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -544,7 +543,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   ExceptionPointerRegister = 0;
   ExceptionSelectorRegister = 0;
   BooleanContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = SchedulingForLatency;
+  SchedPreferenceInfo = Sched::Latency;
   JumpBufSize = 0;
   JumpBufAlignment = 0;
   IfCvtBlockSizeLimit = 2;
@@ -2417,7 +2416,7 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint,
                              EVT VT) const {
   if (Constraint[0] != '{')
-    return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+    return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
 
   // Remove the braces from around the name.
@@ -2449,7 +2448,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     }
   }
   
-  return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+  return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index d20477f..a081e3c 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -12,9 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-TargetSelectionDAGInfo::TargetSelectionDAGInfo() {
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+  : TD(TM.getTargetData()) {
 }
 
 TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 1f68a6f..ed3c243 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -460,7 +460,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
       continue;
-    if (DstReg == IntB.reg) {
+    if (DstReg == IntB.reg && DstSubIdx == 0) {
       // This copy will become a noop. If it's defining a new val#,
       // remove that val# as well. However this live range is being
       // extended to the end of the existing live range defined by the copy.
@@ -624,9 +624,10 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
     LR->valno->addKill(LastUseIdx.getDefIndex());
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-        DstReg == li.reg) {
+        DstReg == li.reg && DstSubIdx == 0) {
       // Last use is itself an identity code.
-      int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+      int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg,
+                                                         false, false, tri_);
       LastUseMI->getOperand(DeadIdx).setIsDead();
     }
     return true;
@@ -810,6 +811,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
       unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
       if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
                             CopySrcSubIdx, CopyDstSubIdx) &&
+          CopySrcSubIdx == 0 &&
+          CopyDstSubIdx == 0 &&
           CopySrcReg != CopyDstReg &&
           CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
         // If the use is a copy and it won't be coalesced away, and its source
@@ -835,8 +838,13 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
                     UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
           UseMI->addRegisterKilled(DstReg, tri_, true);
       }
-      DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
-                   << "\t" << *UseMI);
+
+      DEBUG({
+          dbgs() << "\t\tupdated: ";
+          if (!UseMI->isDebugValue())
+            dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+          dbgs() << *UseMI;
+        });
       continue;
     }
 
@@ -845,14 +853,21 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
     // EAX: 1 -> AL, 2 -> AX
     // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
     // sub-register 2 is also AX.
+    //
+    // FIXME: Properly compose subreg indices for all targets.
+    //
     if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
-      assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
+      ;
     else if (SubIdx)
       O.setSubReg(SubIdx);
     O.setReg(DstReg);
 
-    DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
-                 << "\t" << *UseMI);
+    DEBUG({
+        dbgs() << "\t\tupdated: ";
+        if (!UseMI->isDebugValue())
+          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+        dbgs() << *UseMI;
+      });
 
     // After updating the operand, check if the machine instruction has
     // become a copy. If so, update its val# information.
@@ -938,7 +953,7 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
   MachineInstr *DefMI =
     li_->getInstructionFromIndex(LRStart.getDefIndex());
   if (DefMI && DefMI != CopyMI) {
-    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false);
+    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg);
     if (DeadIdx != -1)
       DefMI->getOperand(DeadIdx).setIsDead();
     else
@@ -1255,7 +1270,12 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
                                                unsigned &RealDstReg) {
   const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
   RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
-  assert(RealDstReg && "Invalid extract_subreg instruction!");
+  if (!RealDstReg) {
+    DEBUG(dbgs() << "\tIncompatible source regclass: "
+                 << "none of the super-registers of " << tri_->getName(DstReg)
+                 << " are in " << RC->getName() << ".\n");
+    return false;
+  }
 
   LiveInterval &RHS = li_->getInterval(SrcReg);
   // For this type of EXTRACT_SUBREG, conservatively
@@ -1293,7 +1313,12 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
                                                unsigned &RealSrcReg) {
   const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
   RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
-  assert(RealSrcReg && "Invalid extract_subreg instruction!");
+  if (!RealSrcReg) {
+    DEBUG(dbgs() << "\tIncompatible destination regclass: "
+                 << "none of the super-registers of " << tri_->getName(SrcReg)
+                 << " are in " << RC->getName() << ".\n");
+    return false;
+  }
 
   LiveInterval &LHS = li_->getInterval(DstReg);
   if (li_->hasInterval(RealSrcReg) &&
@@ -1419,7 +1444,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     assert(DstSubRC && "Illegal subregister index");
     if (!DstSubRC->contains(SrcSubReg)) {
       DEBUG(dbgs() << "\tIncompatible destination regclass: "
-                   << tri_->getName(SrcSubReg) << " not in "
+                   << "none of the super-registers of "
+                   << tri_->getName(SrcSubReg) << " are in "
                    << DstSubRC->getName() << ".\n");
       return false;             // Not coalescable.
     }
@@ -1436,7 +1462,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     assert(SrcSubRC && "Illegal subregister index");
     if (!SrcSubRC->contains(DstSubReg)) {
       DEBUG(dbgs() << "\tIncompatible source regclass: "
-                   << tri_->getName(DstSubReg) << " not in "
+                   << "none of the super-registers of "
+                   << tri_->getName(DstSubReg) << " are in "
                    << SrcSubRC->getName() << ".\n");
       (void)DstSubReg;
       return false;             // Not coalescable.
@@ -2625,7 +2652,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
       MachineInstr *UseMI = Use.getParent();
       unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
       if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-          SrcReg == DstReg)
+          SrcReg == DstReg && SrcSubIdx == DstSubIdx)
         // Ignore identity copies.
         continue;
       SlotIndex Idx = li_->getInstructionIndex(UseMI);
@@ -2654,7 +2681,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
     // Ignore identity copies.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-          SrcReg == DstReg))
+          SrcReg == DstReg && SrcSubIdx == DstSubIdx))
       for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
         MachineOperand &Use = MI->getOperand(i);
         if (Use.isReg() && Use.isUse() && Use.getReg() &&
@@ -2785,7 +2812,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
 
       // If the move will be an identity move delete it
       bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
-      if (isMove && SrcReg == DstReg) {
+      if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) {
         if (li_->hasInterval(SrcReg)) {
           LiveInterval &RegInt = li_->getInterval(SrcReg);
           // If def of this move instruction is dead, remove its live range
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 63c5554..a7b2efe 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -51,6 +51,7 @@ protected:
   MachineFrameInfo *mfi;
   MachineRegisterInfo *mri;
   const TargetInstrInfo *tii;
+  const TargetRegisterInfo *tri;
   VirtRegMap *vrm;
   
   /// Construct a spiller base. 
@@ -60,6 +61,7 @@ protected:
     mfi = mf->getFrameInfo();
     mri = &mf->getRegInfo();
     tii = mf->getTarget().getInstrInfo();
+    tri = mf->getTarget().getRegisterInfo();
   }
 
   /// Add spill ranges for every use/def of the live interval, inserting loads
@@ -129,7 +131,8 @@ protected:
       // Insert reload if necessary.
       MachineBasicBlock::iterator miItr(mi);
       if (hasUse) {
-        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc);
+        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+                                  tri);
         MachineInstr *loadInstr(prior(miItr));
         SlotIndex loadIndex =
           lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
@@ -142,8 +145,8 @@ protected:
 
       // Insert store if necessary.
       if (hasDef) {
-        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true,
-                                 ss, trc);
+        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+                                 true, ss, trc, tri);
         MachineInstr *storeInstr(llvm::next(miItr));
         SlotIndex storeIndex =
           lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
@@ -333,7 +336,8 @@ private:
       // Insert a copy at the start of the MBB. The range proceeding the
       // copy will be attached to the original LiveInterval.
       MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
-      tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc);
+      tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc,
+                        DebugLoc());
       MachineInstr *copyMI = defMBB->begin();
       copyMI->addRegisterKilled(li->reg, tri);
       SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
@@ -386,7 +390,8 @@ private:
 
       if (isTwoAddr && !twoAddrUseIsUndef) {
         MachineBasicBlock *defMBB = defInst->getParent();
-        tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc);
+        tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc,
+                          DebugLoc());
         MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
         SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
         copyMI->addRegisterKilled(li->reg, tri);
@@ -446,8 +451,9 @@ private:
         // reg.
         MachineBasicBlock *useMBB = useInst->getParent();
         MachineBasicBlock::iterator useItr(useInst);
-        tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc);
-        MachineInstr *copyMI = next(useItr);
+        tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc,
+                          DebugLoc());
+        MachineInstr *copyMI = llvm::next(useItr);
         copyMI->addRegisterKilled(newVReg, tri);
         SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
 
@@ -483,7 +489,8 @@ private:
         assert(oldKillRange != 0 && "No kill range?");
 
         tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
-                          li->reg, newVReg, trc, trc);
+                          li->reg, newVReg, trc, trc,
+                          DebugLoc());
         MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
         copyMI->addRegisterKilled(newVReg, tri);
         SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 42dfd7f..7f3b452 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -13,6 +13,8 @@
 
 #define DEBUG_TYPE "stackcoloring"
 #include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -116,6 +118,7 @@ namespace {
 
   private:
     void InitializeSlots();
+    bool CheckForSetJmpCall(const MachineFunction &MF) const;
     void ScanForSpillSlotRefs(MachineFunction &MF);
     bool OverlapWithAssignments(LiveInterval *li, int Color) const;
     int ColorSlot(LiveInterval *li);
@@ -607,7 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
       DEBUG(MI->dump());
       ++NumLoadElim;
     } else {
-      TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC);
+      TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC,
+                        MI->getDebugLoc());
       ++NumRegRepl;
     }
 
@@ -623,7 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
       DEBUG(MI->dump());
       ++NumStoreElim;
     } else {
-      TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC);
+      TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC,
+                        MI->getDebugLoc());
       ++NumRegRepl;
     }
 
@@ -697,7 +702,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 
 
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "********** Stack Slot Coloring **********\n");
+  DEBUG({
+      dbgs() << "********** Stack Slot Coloring **********\n"
+             << "********** Function: " 
+             << MF.getFunction()->getName() << '\n';
+    });
 
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo(); 
@@ -716,6 +725,13 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
       return false;
   }
 
+  // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+  // coloring. The stack could be modified before the longjmp is executed,
+  // resulting in the wrong value being used afterwards. (See
+  // <rdar://problem/8007500>.)
+  if (MF.callsSetJmp())
+    return false;
+
   // Gather spill slot references
   ScanForSpillSlotRefs(MF);
   InitializeSlots();
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index f8f6a55..142398c 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -696,7 +696,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
         // the Phi defining curr.second
         MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
         TII->copyRegToReg(*PI->getParent(), PI, t,
-                          curr.second, RC, RC);
+                          curr.second, RC, RC, DebugLoc());
         
         DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
                      << "\n");
@@ -713,7 +713,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       
       // Insert copy from map[curr.first] to curr.second
       TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
-                        map[curr.first], RC, RC);
+                        map[curr.first], RC, RC, DebugLoc());
       map[curr.first] = curr.second;
       DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
                    << curr.second << "\n");
@@ -762,7 +762,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
         // Insert a copy from dest to a new temporary t at the end of b
         unsigned t = MF->getRegInfo().createVirtualRegister(RC);
         TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
-                          curr.second, RC, RC);
+                          curr.second, RC, RC, DebugLoc());
         map[curr.second] = t;
         
         MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
@@ -961,7 +961,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
           const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
           const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
           TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
-                            I->first, SI->first, RC, RC);
+                            I->first, SI->first, RC, RC, DebugLoc());
           
           LI.renumber();
           
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index aa6e2b4..f2e2a76 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -561,7 +561,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
       const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
       TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
-                        CopyInfos[i].second, RC,RC);
+                        CopyInfos[i].second, RC,RC, DebugLoc());
       MachineInstr *CopyMI = prior(Loc);
       Copies.push_back(CopyMI);
     }
@@ -620,7 +620,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
       for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
         const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
         TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
-                          CopyInfos[i].second, RC, RC);
+                          CopyInfos[i].second, RC, RC, DebugLoc());
         MachineInstr *CopyMI = prior(Loc);
         Copies.push_back(CopyMI);
       }
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9f95993..71ad3fb 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
@@ -460,6 +461,26 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
     = getContext().getMachOSection("__DATA", "__data", 0,
                                    SectionKind::getDataRel());
 
+  TLSDataSection // .tdata
+    = getContext().getMachOSection("__DATA", "__thread_data",
+                                   MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+                                   SectionKind::getDataRel());
+  TLSBSSSection // .tbss
+    = getContext().getMachOSection("__DATA", "__thread_bss",
+                                   MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+                                   SectionKind::getThreadBSS());
+                                   
+  // TODO: Verify datarel below.
+  TLSTLVSection // .tlv
+    = getContext().getMachOSection("__DATA", "__thread_vars",
+                                   MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+                                   SectionKind::getDataRel());
+                                   
+  TLSThreadInitSection
+    = getContext().getMachOSection("__DATA", "__thread_init",
+                          MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+                          SectionKind::getDataRel());
+                                   
   CStringSection // .cstring
     = getContext().getMachOSection("__TEXT", "__cstring", 
                                    MCSectionMachO::S_CSTRING_LITERALS,
@@ -606,6 +627,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
     getContext().getMachOSection("__DWARF", "__debug_inlined",
                                  MCSectionMachO::S_ATTR_DEBUG,
                                  SectionKind::getMetadata());
+                                 
+  TLSExtraDataSection = TLSTLVSection;
 }
 
 const MCSection *TargetLoweringObjectFileMachO::
@@ -646,7 +669,10 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
+  
+  // Handle thread local data.
+  if (Kind.isThreadBSS()) return TLSBSSSection;
+  if (Kind.isThreadData()) return TLSDataSection;
 
   if (Kind.isText())
     return GV->isWeakForLinker() ? TextCoalSection : TextSection;
@@ -794,94 +820,160 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
 //                                  COFF
 //===----------------------------------------------------------------------===//
 
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-
-TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
-  delete (COFFUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new COFFUniqueMapTy();
-  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionCOFF *&Entry = Map[Name];
-  if (Entry) return Entry;
-
-  return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
-}
-
 void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                               const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((COFFUniqueMapTy*)UniquingMap)->clear();
   TargetLoweringObjectFile::Initialize(Ctx, TM);
-  TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
-  DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
+  TextSection =
+    getContext().getCOFFSection(".text",
+                                MCSectionCOFF::IMAGE_SCN_CNT_CODE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getText());
+  DataSection =
+    getContext().getCOFFSection(".data",
+                                MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ |
+                                MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+  ReadOnlySection =
+    getContext().getCOFFSection(".rdata",
+                                MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getReadOnly());
   StaticCtorSection =
-    getCOFFSection(".ctors", false, SectionKind::getDataRel());
+    getContext().getCOFFSection(".ctors",
+                                MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ |
+                                MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
   StaticDtorSection =
-    getCOFFSection(".dtors", false, SectionKind::getDataRel());
+    getContext().getCOFFSection(".dtors",
+                                MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ |
+                                MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
 
   // FIXME: We're emitting LSDA info into a readonly section on COFF, even
   // though it contains relocatable pointers.  In PIC mode, this is probably a
   // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
   LSDASection =
-    getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
+    getContext().getCOFFSection(".gcc_except_table",
+                                MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getReadOnly());
   EHFrameSection =
-    getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
+    getContext().getCOFFSection(".eh_frame",
+                                MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ |
+                                MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
 
   // Debug info.
-  // FIXME: Don't use 'directive' mode here.
   DwarfAbbrevSection =
-    getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_abbrev",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfInfoSection =
-    getCOFFSection("\t.section\t.debug_info,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_info",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfLineSection =
-    getCOFFSection("\t.section\t.debug_line,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_line",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfFrameSection =
-    getCOFFSection("\t.section\t.debug_frame,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_frame",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_pubnames",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_pubtypes",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfStrSection =
-    getCOFFSection("\t.section\t.debug_str,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_str",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfLocSection =
-    getCOFFSection("\t.section\t.debug_loc,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_loc",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfARangesSection =
-    getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_aranges",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfRangesSection =
-    getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_ranges",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
-                   true, SectionKind::getMetadata());
+    getContext().getCOFFSection(".debug_macinfo",
+                                MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                MCSectionCOFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+
+  DrectveSection =
+    getContext().getCOFFSection(".drectve",
+                                MCSectionCOFF::IMAGE_SCN_LNK_INFO,
+                                SectionKind::getMetadata());
+}
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (!K.isMetadata())
+    Flags |=
+      MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE;
+  else if (K.isText())
+    Flags |=
+      MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
+      MCSectionCOFF::IMAGE_SCN_CNT_CODE;
+  else if (K.isBSS ())
+    Flags |=
+      MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+      MCSectionCOFF::IMAGE_SCN_MEM_READ |
+      MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+  else if (K.isReadOnly())
+    Flags |=
+      MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      MCSectionCOFF::IMAGE_SCN_MEM_READ;
+  else if (K.isWriteable())
+    Flags |=
+      MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      MCSectionCOFF::IMAGE_SCN_MEM_READ |
+      MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+
+  return Flags;
 }
 
 const MCSection *TargetLoweringObjectFileCOFF::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
-  return getCOFFSection(GV->getSection(), false, Kind);
+  return getContext().getCOFFSection(GV->getSection(),
+                                     getCOFFSectionFlags(Kind),
+                                     Kind);
 }
 
 static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
   if (Kind.isText())
     return ".text$linkonce";
+  if (Kind.isBSS ())
+    return ".bss$linkonce";
   if (Kind.isWriteable())
     return ".data$linkonce";
   return ".rdata$linkonce";
@@ -900,7 +992,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
     Name.append(Sym->getName().begin(), Sym->getName().end());
-    return getCOFFSection(Name.str(), false, Kind);
+
+    unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+    Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT;
+
+    return getContext().getCOFFSection(Name.str(), Characteristics,
+                          MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
   }
 
   if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c288ae0..3d10dc1 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
@@ -77,6 +78,10 @@ namespace {
     // registers from virtual registers. e.g. r1 = move v1024.
     DenseMap<unsigned, unsigned> DstRegMap;
 
+    /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+    /// during the initial walk of the machine function.
+    SmallVector<MachineInstr*, 16> RegSequences;
+
     bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
                               unsigned Reg,
                               MachineBasicBlock::iterator OldPos);
@@ -123,6 +128,13 @@ namespace {
     void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
                      SmallPtrSet<MachineInstr*, 8> &Processed);
 
+    void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+
+    /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+    /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+    /// sub-register references of the register defined by REG_SEQUENCE.
+    bool EliminateRegSequences();
+
   public:
     static char ID; // Pass identification, replacement for typeid
     TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
@@ -768,7 +780,7 @@ canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
     if (!LastKill)
       return false;
 
-    bool isModRef = LastKill->modifiesRegister(Kill);
+    bool isModRef = LastKill->definesRegister(Kill);
     NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
                                       LastKill));
   }
@@ -929,6 +941,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
+      // Remember REG_SEQUENCE instructions, we'll deal with them later.
+      if (mi->isRegSequence())
+        RegSequences.push_back(&*mi);
+
       const TargetInstrDesc &TID = mi->getDesc();
       bool FirstTied = true;
 
@@ -1035,7 +1051,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
             ReMatRegs.set(regB);
             ++NumReMats;
           } else {
-            bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
+            bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc,
+                                             mi->getDebugLoc());
             (void)Emitted;
             assert(Emitted && "Unable to issue a copy instruction!\n");
           }
@@ -1110,5 +1127,211 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
     VReg = ReMatRegs.find_next(VReg);
   }
 
+  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
+  // SSA form. It's now safe to de-SSA.
+  MadeChange |= EliminateRegSequences();
+
   return MadeChange;
 }
+
+static void UpdateRegSequenceSrcs(unsigned SrcReg,
+                                  unsigned DstReg, unsigned SubIdx,
+                                  MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+         RE = MRI->reg_end(); RI != RE; ) {
+    MachineOperand &MO = RI.getOperand();
+    ++RI;
+    MO.setReg(DstReg);
+    assert(MO.getSubReg() == 0);
+    MO.setSubReg(SubIdx);
+  }
+}
+
+/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
+/// EXTRACT_SUBREG from the same register and to the same virtual register
+/// with different sub-register indices, attempt to combine the
+/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
+/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
+/// reg1026 to reg1029.
+void
+TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
+                                              unsigned DstReg) {
+  SmallSet<unsigned, 4> Seen;
+  for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
+    unsigned SrcReg = Srcs[i];
+    if (!Seen.insert(SrcReg))
+      continue;
+
+    // If there are no other uses than extract_subreg which feed into
+    // the reg_sequence, then we might be able to coalesce them.
+    bool CanCoalesce = true;
+    SmallVector<unsigned, 4> SubIndices;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineInstr *UseMI = &*UI;
+      if (!UseMI->isExtractSubreg() ||
+          UseMI->getOperand(0).getReg() != DstReg) {
+        CanCoalesce = false;
+        break;
+      }
+      SubIndices.push_back(UseMI->getOperand(2).getImm());
+    }
+
+    if (!CanCoalesce || SubIndices.size() < 2)
+      continue;
+
+    std::sort(SubIndices.begin(), SubIndices.end());
+    unsigned NewSubIdx = 0;
+    if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
+                                    NewSubIdx)) {
+      bool Proceed = true;
+      if (NewSubIdx)
+        for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+               RE = MRI->reg_end(); RI != RE; ) {
+          MachineOperand &MO = RI.getOperand();
+          ++RI;
+          // FIXME: If the sub-registers do not combine to the whole
+          // super-register, i.e. NewSubIdx != 0, and any of the use has a
+          // sub-register index, then abort the coalescing attempt.
+          if (MO.getSubReg()) {
+            Proceed = false;
+            break;
+          }
+          MO.setReg(DstReg);
+          MO.setSubReg(NewSubIdx);
+        }
+      if (Proceed)
+        for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+               RE = MRI->reg_end(); RI != RE; ) {
+          MachineOperand &MO = RI.getOperand();
+          ++RI;
+          MO.setReg(DstReg);
+          if (NewSubIdx)
+            MO.setSubReg(NewSubIdx);
+        }
+      }
+  }
+}
+
+static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
+                                    MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI != RegSeq && UseMI->isRegSequence())
+      return true;
+  }
+  return false;
+}
+
+/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+/// sub-register references of the register defined by REG_SEQUENCE. e.g.
+///
+/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
+/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
+/// =>
+/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+bool TwoAddressInstructionPass::EliminateRegSequences() {
+  if (RegSequences.empty())
+    return false;
+
+  for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
+    MachineInstr *MI = RegSequences[i];
+    unsigned DstReg = MI->getOperand(0).getReg();
+    if (MI->getOperand(0).getSubReg() ||
+        TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+        !(MI->getNumOperands() & 1)) {
+      DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+      llvm_unreachable(0);
+    }
+
+    bool IsImpDef = true;
+    SmallVector<unsigned, 4> RealSrcs;
+    SmallSet<unsigned, 4> Seen;
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+      unsigned SrcReg = MI->getOperand(i).getReg();
+      if (MI->getOperand(i).getSubReg() ||
+          TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+        DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+        llvm_unreachable(0);
+      }
+
+      MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+      if (DefMI->isImplicitDef()) {
+        DefMI->eraseFromParent();
+        continue;
+      }
+      IsImpDef = false;
+
+      // Remember EXTRACT_SUBREG sources. These might be candidate for
+      // coalescing.
+      if (DefMI->isExtractSubreg())
+        RealSrcs.push_back(DefMI->getOperand(1).getReg());
+
+      if (!Seen.insert(SrcReg) ||
+          MI->getParent() != DefMI->getParent() ||
+          !MI->getOperand(i).isKill() ||
+          HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+        // REG_SEQUENCE cannot have duplicated operands, add a copy.
+        // Also add an copy if the source is live-in the block. We don't want
+        // to end up with a partial-redef of a livein, e.g.
+        // BB0:
+        // reg1051:10<def> =
+        // ...
+        // BB1:
+        // ... = reg1051:10
+        // BB2:
+        // reg1051:9<def> =
+        // LiveIntervalAnalysis won't like it.
+        //
+        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
+        // correctly up to date becomes very difficult. Insert a copy.
+        //
+        const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+        unsigned NewReg = MRI->createVirtualRegister(RC);
+        MachineBasicBlock::iterator InsertLoc = MI;
+        bool Emitted =
+          TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC,
+                            MI->getDebugLoc());
+        (void)Emitted;
+        assert(Emitted && "Unable to issue a copy instruction!\n");
+        MI->getOperand(i).setReg(NewReg);
+        if (MI->getOperand(i).isKill()) {
+          MachineBasicBlock::iterator CopyMI = prior(InsertLoc);
+          MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg);
+          KillMO->setIsKill();
+          if (LV)
+            // Update live variables
+            LV->replaceKillInstruction(SrcReg, MI, &*CopyMI);
+        }
+      }
+    }
+
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+      unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
+      UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI);
+    }
+
+    if (IsImpDef) {
+      DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+      MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+      for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+        MI->RemoveOperand(j);      
+    } else {
+      DEBUG(dbgs() << "Eliminated: " << *MI);
+      MI->eraseFromParent();
+    }
+
+    // Try coalescing some EXTRACT_SUBREG instructions.
+    CoalesceExtSubRegs(RealSrcs, DstReg);
+  }
+
+  RegSequences.clear();
+  return true;
+}
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 7f0412c..871d836 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -907,7 +907,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
                         TRI, VRM);
         } else {
           TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
-                                    NewOp.StackSlotOrReMat, AliasRC);
+                                    NewOp.StackSlotOrReMat, AliasRC, TRI);
           MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
           // Any stores to this stack slot are not dead anymore.
@@ -1265,7 +1265,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
   ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
 
   // Load from SS to the spare physical register.
-  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC);
+  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
   // This invalidates Phys.
   Spills.ClobberPhysReg(PhysReg);
   // Remember it's available.
@@ -1308,7 +1308,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
   } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
 
   // Store the value back into SS.
-  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC);
+  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
   MachineInstr *StoreMI = prior(NextMII);
   VRM->addSpillSlotUse(SS, StoreMI);
   VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
@@ -1523,7 +1523,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
     VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
     // Insert new def MI and spill MI.
     const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC);
+    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
     MII = prior(MII);
     MachineInstr *StoreMI = MII;
     VRM->addSpillSlotUse(SS, StoreMI);
@@ -1566,7 +1566,8 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
                     std::vector<MachineOperand*> &KillOps) {
 
   MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
+  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
+                           TRI);
   MachineInstr *StoreMI = prior(oldNextMII);
   VRM->addSpillSlotUse(StackSlot, StoreMI);
   DEBUG(dbgs() << "Store:\t" << *StoreMI);
@@ -1709,7 +1710,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
     if (UsedSS.count(SS))
       llvm_unreachable("Need to spill more than one physical registers!");
     UsedSS.insert(SS);
-    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC);
+    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
     MachineInstr *StoreMI = prior(MII);
     VRM->addSpillSlotUse(SS, StoreMI);
 
@@ -1718,7 +1719,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
       ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
                        TII, *MBB->getParent());
 
-    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC);
+    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
 
     MachineInstr *LoadMI = prior(InsertLoc);
     VRM->addSpillSlotUse(SS, LoadMI);
@@ -1793,7 +1794,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
         ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
                          *MBB->getParent());
 
-      TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC);
+      TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC,
+                        MI->getDebugLoc());
 
       // This invalidates Phys.
       Spills.ClobberPhysReg(Phys);
@@ -1821,7 +1823,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
       ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
     } else {
       const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC);
+      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
       MachineInstr *LoadMI = prior(InsertLoc);
       VRM->addSpillSlotUse(SSorRMId, LoadMI);
       ++NumLoads;
@@ -1857,7 +1859,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
     int StackSlot = VRM->getStackSlot(VirtReg);
     MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
     TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
-                             RC);
+                             RC, TRI);
     MachineInstr *StoreMI = prior(oldNextMII);
     VRM->addSpillSlotUse(StackSlot, StoreMI);
     DEBUG(dbgs() << "Store:\t" << *StoreMI);
@@ -1893,6 +1895,11 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
 
   // Clear kill info.
   SmallSet<unsigned, 2> KilledMIRegs;
+
+  // Keep track of the registers we have already spilled in case there are
+  // multiple defs of the same register in MI.
+  SmallSet<unsigned, 8> SpilledMIRegs;
+
   RegKills.reset();
   KillOps.clear();
   KillOps.resize(TRI->getNumRegs(), NULL);
@@ -2138,7 +2145,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
           ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
                            SSorRMId, TII, MF);
 
-        TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+        TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC,
+                          MI.getDebugLoc());
 
         MachineInstr *CopyMI = prior(InsertLoc);
         CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
@@ -2183,7 +2191,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
           ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
         } else {
           const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-          TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC);
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
           MachineInstr *LoadMI = prior(InsertLoc);
           VRM->addSpillSlotUse(SSorRMId, LoadMI);
           ++NumLoads;
@@ -2262,7 +2270,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
             DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
             if (DestReg != InReg) {
               const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-              TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC);
+              TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC,
+                                MI.getDebugLoc());
               MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
               unsigned SubIdx = DefMO->getSubReg();
               // Revisit the copy so we make sure to notice the effects of the
@@ -2408,6 +2417,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     }
 
     // Process all of the spilled defs.
+    SpilledMIRegs.clear();
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI.getOperand(i);
       if (!(MO.isReg() && MO.getReg() && MO.isDef()))
@@ -2421,7 +2431,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
         // eliminate this or else the undef marker is lost and it will
         // confuses the scavenger. This is extremely rare.
         unsigned Src, Dst, SrcSR, DstSR;
-        if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
+        if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
+            Src == Dst && SrcSR == DstSR &&
             !MI.findRegisterUseOperand(Src)->isUndef()) {
           ++NumDCE;
           DEBUG(dbgs() << "Removing now-noop copy: " << MI);
@@ -2500,7 +2511,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
       MI.getOperand(i).setReg(RReg);
       MI.getOperand(i).setSubReg(0);
 
-      if (!MO.isDead()) {
+      if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
         MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
         SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
           LastStore, Spills, ReMatDefs, RegKills, KillOps);
@@ -2510,7 +2521,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
         // instruction before considering the dest reg to be changed.
         {
           unsigned Src, Dst, SrcSR, DstSR;
-          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
+              Src == Dst && SrcSR == DstSR) {
             ++NumDCE;
             DEBUG(dbgs() << "Removing now-noop copy: " << MI);
             InvalidateKills(MI, TRI, RegKills, KillOps);
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 9d07811..5f30dce 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -33,8 +33,27 @@ extern const char* ProgramName;
 }
 
 namespace {
-  int ExecuteProgram(const std::string& name,
-                     const StrVector& args) {
+
+  void PrintString (const std::string& str) {
+    errs() << str << ' ';
+  }
+
+  void PrintCommand (const std::string& Cmd, const StrVector& Args) {
+    errs() << Cmd << ' ';
+    std::for_each(Args.begin(), Args.end(), &PrintString);
+    errs() << '\n';
+  }
+
+  bool IsSegmentationFault (int returnCode) {
+#ifdef LLVM_ON_WIN32
+    return (returnCode >= 0xc0000000UL)
+#else
+    return (returnCode < 0);
+#endif
+  }
+
+  int ExecuteProgram (const std::string& name,
+                      const StrVector& args) {
     sys::Path prog = sys::Program::FindProgramByName(name);
 
     if (prog.isEmpty()) {
@@ -67,24 +86,25 @@ namespace {
     argv.push_back(0);  // null terminate list.
 
     // Invoke the program.
-    return sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
-  }
+    int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
 
-  void print_string (const std::string& str) {
-    errs() << str << ' ';
+    if (IsSegmentationFault(ret)) {
+      errs() << "Segmentation fault: ";
+      PrintCommand(name, args);
+    }
+
+    return ret;
   }
 }
 
 namespace llvmc {
-  void AppendToGlobalTimeLog(const std::string& cmd, double time);
+  void AppendToGlobalTimeLog (const std::string& cmd, double time);
 }
 
-int llvmc::Action::Execute() const {
-  if (DryRun || VerboseMode) {
-    errs() << Command_ << " ";
-    std::for_each(Args_.begin(), Args_.end(), print_string);
-    errs() << '\n';
-  }
+int llvmc::Action::Execute () const {
+  if (DryRun || VerboseMode)
+    PrintCommand(Command_, Args_);
+
   if (!DryRun) {
     if (Time) {
       sys::TimeValue now = sys::TimeValue::now();
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index b17827e..be7f1f5 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -715,7 +715,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
           case Instruction::FDiv: 
             GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
           case Instruction::FRem: 
-            GV.FloatVal = ::fmodf(LHS.FloatVal,RHS.FloatVal); break;
+            GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
         }
         break;
       case Type::DoubleTyID:
@@ -730,7 +730,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
           case Instruction::FDiv: 
             GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
           case Instruction::FRem: 
-            GV.DoubleVal = ::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+            GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
         }
         break;
       case Type::X86_FP80TyID:
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index dba0e14..5e8a3b6 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -10,10 +10,13 @@ add_llvm_library(LLVMMC
   MCExpr.cpp
   MCInst.cpp
   MCInstPrinter.cpp
+  MCLabel.cpp
+  MCLoggingStreamer.cpp
   MCMachOStreamer.cpp
   MCNullStreamer.cpp
   MCObjectWriter.cpp
   MCSection.cpp
+  MCSectionCOFF.cpp
   MCSectionELF.cpp
   MCSectionMachO.cpp
   MCStreamer.cpp
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 2b23994..a275be2 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
 MCAsmInfo::MCAsmInfo() {
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
+  HasMachoTBSSDirective = false;
   HasStaticCtorDtorReferenceInStaticMode = false;
   MaxInstLength = 4;
   PCSymbol = "$";
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 3c31caa..0bd3b2d 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -35,6 +35,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   WeakRefDirective = "\t.weak_reference ";
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
   HasMachoZeroFillDirective = true;  // Uses .zerofill
+  HasMachoTBSSDirective = true; // Uses .tbss
   HasStaticCtorDtorReferenceInStaticMode = true;
   
   HiddenVisibilityAttr = MCSA_PrivateExtern;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 2c7e1c4..57b2bcc 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -109,7 +109,10 @@ public:
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
 
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
-
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
   virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
@@ -123,6 +126,9 @@ public:
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0);
 
+  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+                               uint64_t Size, unsigned ByteAlignment = 0);
+                               
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
 
   virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
@@ -218,6 +224,7 @@ void MCAsmStreamer::SwitchSection(const MCSection *Section) {
 
 void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
   assert(CurSection && "Cannot emit before setting section!");
 
   OS << *Symbol << ":";
@@ -234,16 +241,11 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 }
 
 void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  // Only absolute symbols can be redefined.
-  assert((Symbol->isUndefined() || Symbol->isAbsolute()) &&
-         "Cannot define a symbol twice!");
-
   OS << *Symbol << " = " << *Value;
   EmitEOL();
 
   // FIXME: Lift context changes into super class.
-  // FIXME: Set associated section.
-  Symbol->setValue(Value);
+  Symbol->setVariableValue(Value);
 }
 
 void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
@@ -297,6 +299,26 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
   EmitEOL();
 }
 
+void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+  OS << "\t.def\t " << *Symbol << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+  OS << "\t.scl\t" << StorageClass << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+  OS << "\t.type\t" << Type << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EndCOFFSymbolDef() {
+  OS << "\t.endef";
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
   assert(MAI.hasDotTypeDotSizeDirective());
   OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
@@ -341,6 +363,23 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
   EmitEOL();
 }
 
+// .tbss sym, size, align
+// This depends that the symbol has already been mangled from the original,
+// e.g. _a.
+void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                   uint64_t Size, unsigned ByteAlignment) {
+  assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+  // Instead of using the Section we'll just use the shortcut.
+  // This is a mach-o specific directive and section.
+  OS << ".tbss " << *Symbol << ", " << Size;
+  
+  // Output align if we have it.  We default to 1 so don't bother printing
+  // that.
+  if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
+  
+  EmitEOL();
+}
+
 static inline char toOctal(int X) { return (X&7)+'0'; }
 
 static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
@@ -630,9 +669,11 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
     AddEncodingComment(Inst);
 
   // Show the MCInst if enabled.
-  if (ShowInst)
+  if (ShowInst) {
     Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
-  
+    GetCommentOS() << "\n";
+  }
+
   // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
   if (InstPrinter)
     InstPrinter->printInst(&Inst, OS);
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 69afcc8..5936656 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -47,93 +47,131 @@ STATISTIC(SectionLayouts, "Number of section layouts");
 
 /* *** */
 
-void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
-  // We shouldn't have to do anything special to support negative slides, and it
-  // is a perfectly valid thing to do as long as other parts of the system are
-  // can guarantee convergence.
-  assert(SlideAmount >= 0 && "Negative slides not yet supported");
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+  : Assembler(Asm), LastValidFragment(0)
+ {
+  // Compute the section layout order. Virtual sections must go last.
+  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+    if (!Asm.getBackend().isVirtualSection(it->getSection()))
+      SectionOrder.push_back(&*it);
+  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+    if (Asm.getBackend().isVirtualSection(it->getSection()))
+      SectionOrder.push_back(&*it);
+}
 
-  // Update the layout by simply recomputing the layout for the entire
-  // file. This is trivially correct, but very slow.
-  //
-  // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter.
+bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const {
+  // The first section is always up-to-date.
+  unsigned Index = SD->getLayoutOrder();
+  if (!Index)
+    return true;
 
-  // Layout the concrete sections and fragments.
-  MCAssembler &Asm = getAssembler();
-  uint64_t Address = 0;
-  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
-    // Skip virtual sections.
-    if (Asm.getBackend().isVirtualSection(it->getSection()))
-      continue;
+  // Otherwise, sections are always implicitly computed when the preceeding
+  // fragment is layed out.
+  const MCSectionData *Prev = getSectionOrder()[Index - 1];
+  return isFragmentUpToDate(&(Prev->getFragmentList().back()));
+}
+
+bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
+  return (LastValidFragment &&
+          F->getLayoutOrder() <= LastValidFragment->getLayoutOrder());
+}
 
-    // Layout the section fragments and its size.
-    Address = Asm.LayoutSection(*it, *this, Address);
+void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
+  // If this fragment wasn't already up-to-date, we don't need to do anything.
+  if (!isFragmentUpToDate(F))
+    return;
+
+  // Otherwise, reset the last valid fragment to the predecessor of the
+  // invalidated fragment.
+  LastValidFragment = F->getPrevNode();
+  if (!LastValidFragment) {
+    unsigned Index = F->getParent()->getLayoutOrder();
+    if (Index != 0) {
+      MCSectionData *Prev = getSectionOrder()[Index - 1];
+      LastValidFragment = &(Prev->getFragmentList().back());
+    }
   }
+}
 
-  // Layout the virtual sections.
-  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
-    if (!Asm.getBackend().isVirtualSection(it->getSection()))
-      continue;
+void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+  // Advance the layout position until the fragment is up-to-date.
+  while (!isFragmentUpToDate(F)) {
+    // Advance to the next fragment.
+    MCFragment *Cur = LastValidFragment;
+    if (Cur)
+      Cur = Cur->getNextNode();
+    if (!Cur) {
+      unsigned NextIndex = 0;
+      if (LastValidFragment)
+        NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1;
+      Cur = SectionOrder[NextIndex]->begin();
+    }
 
-    // Layout the section fragments and its size.
-    Address = Asm.LayoutSection(*it, *this, Address);
+    const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
   }
 }
 
+void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) {
+  if (LastValidFragment == Src)
+    LastValidFragment = Dst;
+
+  Dst->Offset = Src->Offset;
+  Dst->EffectiveSize = Src->EffectiveSize;
+}
+
 uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const {
   assert(F->getParent() && "Missing section()!");
   return getSectionAddress(F->getParent()) + getFragmentOffset(F);
 }
 
 uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const {
+  EnsureValid(F);
   assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!");
   return F->EffectiveSize;
 }
 
-void MCAsmLayout::setFragmentEffectiveSize(MCFragment *F, uint64_t Value) {
-  F->EffectiveSize = Value;
-}
-
 uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+  EnsureValid(F);
   assert(F->Offset != ~UINT64_C(0) && "Address not set!");
   return F->Offset;
 }
 
-void MCAsmLayout::setFragmentOffset(MCFragment *F, uint64_t Value) {
-  F->Offset = Value;
-}
-
 uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const {
   assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!");
   return getFragmentAddress(SD->getFragment()) + SD->getOffset();
 }
 
 uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const {
+  EnsureValid(SD->begin());
   assert(SD->Address != ~UINT64_C(0) && "Address not set!");
   return SD->Address;
 }
 
-void MCAsmLayout::setSectionAddress(MCSectionData *SD, uint64_t Value) {
-  SD->Address = Value;
-}
-
-uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
-  assert(SD->Size != ~UINT64_C(0) && "File size not set!");
-  return SD->Size;
-}
-void MCAsmLayout::setSectionSize(MCSectionData *SD, uint64_t Value) {
-  SD->Size = Value;
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+  // The size is the last fragment's end offset.
+  const MCFragment &F = SD->getFragmentList().back();
+  return getFragmentOffset(&F) + getFragmentEffectiveSize(&F);
 }
 
 uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
-  assert(SD->FileSize != ~UINT64_C(0) && "File size not set!");
-  return SD->FileSize;
-}
-void MCAsmLayout::setSectionFileSize(MCSectionData *SD, uint64_t Value) {
-  SD->FileSize = Value;
+  // Virtual sections have no file size.
+  if (getAssembler().getBackend().isVirtualSection(SD->getSection()))
+    return 0;
+
+  // Otherwise, the file size is the same as the address space size.
+  return getSectionAddressSize(SD);
 }
 
-  /// @}
+uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
+  // The logical size is the address space size minus any tail padding.
+  uint64_t Size = getSectionAddressSize(SD);
+  const MCAlignFragment *AF =
+    dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back()));
+  if (AF && AF->hasOnlyAlignAddress())
+    Size -= getFragmentEffectiveSize(AF);
+
+  return Size;
+}
 
 /* *** */
 
@@ -141,17 +179,12 @@ MCFragment::MCFragment() : Kind(FragmentType(~0)) {
 }
 
 MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
-  : Kind(_Kind),
-    Parent(_Parent),
-    EffectiveSize(~UINT64_C(0))
+  : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0))
 {
   if (Parent)
     Parent->getFragmentList().push_back(this);
 }
 
-MCFragment::~MCFragment() {
-}
-
 /* *** */
 
 MCSectionData::MCSectionData() : Section(0) {}
@@ -160,8 +193,6 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
   : Section(&_Section),
     Alignment(1),
     Address(~UINT64_C(0)),
-    Size(~UINT64_C(0)),
-    FileSize(~UINT64_C(0)),
     HasInstructions(false)
 {
   if (A)
@@ -195,7 +226,7 @@ MCAssembler::~MCAssembler() {
 }
 
 static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
-                                                const MCAsmFixup &Fixup,
+                                                const MCFixup &Fixup,
                                                 const MCValue Target,
                                                 const MCSection *BaseSection) {
   // The effective fixup address is
@@ -233,7 +264,7 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
 
 static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
                                           const MCAsmLayout &Layout,
-                                          const MCAsmFixup &Fixup,
+                                          const MCFixup &Fixup,
                                           const MCValue Target,
                                           const MCSymbolData *BaseSymbol) {
   // The effective fixup address is
@@ -291,36 +322,6 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const {
     SD->getFragment()->getParent()->getSection());
 }
 
-// FIXME-PERF: This routine is really slow.
-const MCSymbolData *MCAssembler::getAtomForAddress(const MCAsmLayout &Layout,
-                                                   const MCSectionData *Section,
-                                                   uint64_t Address) const {
-  const MCSymbolData *Best = 0;
-  uint64_t BestAddress = 0;
-
-  for (MCAssembler::const_symbol_iterator it = symbol_begin(),
-         ie = symbol_end(); it != ie; ++it) {
-    // Ignore non-linker visible symbols.
-    if (!isSymbolLinkerVisible(it))
-      continue;
-
-    // Ignore symbols not in the same section.
-    if (!it->getFragment() || it->getFragment()->getParent() != Section)
-      continue;
-
-    // Otherwise, find the closest symbol preceding this address (ties are
-    // resolved in favor of the last defined symbol).
-    uint64_t SymbolAddress = Layout.getSymbolAddress(it);
-    if (SymbolAddress <= Address && (!Best || SymbolAddress >= BestAddress)) {
-      Best = it;
-      BestAddress = SymbolAddress;
-    }
-  }
-
-  return Best;
-}
-
-// FIXME-PERF: This routine is really slow.
 const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
                                          const MCSymbolData *SD) const {
   // Linker visible symbols define atoms.
@@ -331,17 +332,22 @@ const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
   if (!SD->getFragment())
     return 0;
 
-  // Otherwise, search by address.
-  return getAtomForAddress(Layout, SD->getFragment()->getParent(),
-                           Layout.getSymbolAddress(SD));
+  // Non-linker visible symbols in sections which can't be atomized have no
+  // defining atom.
+  if (!getBackend().isSectionAtomizable(
+        SD->getFragment()->getParent()->getSection()))
+    return 0;
+
+  // Otherwise, return the atom for the containing fragment.
+  return SD->getFragment()->getAtom();
 }
 
 bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
-                                const MCAsmFixup &Fixup, const MCFragment *DF,
+                                const MCFixup &Fixup, const MCFragment *DF,
                                 MCValue &Target, uint64_t &Value) const {
   ++stats::EvaluateFixup;
 
-  if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout))
+  if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout))
     report_fatal_error("expected relocatable expression");
 
   // FIXME: How do non-scattered symbols work in ELF? I presume the linker
@@ -350,8 +356,8 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
 
   Value = Target.getConstant();
 
-  bool IsPCRel =
-    Emitter.getFixupKindInfo(Fixup.Kind).Flags & MCFixupKindInfo::FKF_IsPCRel;
+  bool IsPCRel = Emitter.getFixupKindInfo(
+    Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
   bool IsResolved = true;
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
     if (A->getSymbol().isDefined())
@@ -374,8 +380,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
       // symbol) that the fixup value is relative to.
       const MCSymbolData *BaseSymbol = 0;
       if (IsPCRel) {
-        BaseSymbol = getAtomForAddress(
-          Layout, DF->getParent(), Layout.getFragmentAddress(DF)+Fixup.Offset);
+        BaseSymbol = DF->getAtom();
         if (!BaseSymbol)
           IsResolved = false;
       }
@@ -394,117 +399,123 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
   }
 
   if (IsPCRel)
-    Value -= Layout.getFragmentAddress(DF) + Fixup.Offset;
+    Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset();
 
   return IsResolved;
 }
 
-uint64_t MCAssembler::LayoutSection(MCSectionData &SD,
-                                    MCAsmLayout &Layout,
-                                    uint64_t StartAddress) {
-  bool IsVirtual = getBackend().isVirtualSection(SD.getSection());
-
-  ++stats::SectionLayouts;
-
-  // Align this section if necessary by adding padding bytes to the previous
-  // section. It is safe to adjust this out-of-band, because no symbol or
-  // fragment is allowed to point past the end of the section at any time.
-  if (uint64_t Pad = OffsetToAlignment(StartAddress, SD.getAlignment())) {
-    // Unless this section is virtual (where we are allowed to adjust the offset
-    // freely), the padding goes in the previous section.
-    if (!IsVirtual) {
-      // Find the previous non-virtual section.
-      iterator it = &SD;
-      assert(it != begin() && "Invalid initial section address!");
-      for (--it; getBackend().isVirtualSection(it->getSection()); --it) ;
-      Layout.setSectionFileSize(&*it, Layout.getSectionFileSize(&*it) + Pad);
-    }
+uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout,
+                                          const MCFragment &F,
+                                          uint64_t SectionAddress,
+                                          uint64_t FragmentOffset) const {
+  switch (F.getKind()) {
+  case MCFragment::FT_Data:
+    return cast<MCDataFragment>(F).getContents().size();
+  case MCFragment::FT_Fill:
+    return cast<MCFillFragment>(F).getSize();
+  case MCFragment::FT_Inst:
+    return cast<MCInstFragment>(F).getInstSize();
 
-    StartAddress += Pad;
-  }
+  case MCFragment::FT_Align: {
+    const MCAlignFragment &AF = cast<MCAlignFragment>(F);
 
-  // Set the aligned section address.
-  Layout.setSectionAddress(&SD, StartAddress);
+    assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) &&
+           "Invalid OnlyAlignAddress bit, not the last fragment!");
 
-  uint64_t Address = StartAddress;
-  for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) {
-    MCFragment &F = *it;
+    uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset,
+                                      AF.getAlignment());
 
-    ++stats::FragmentLayouts;
+    // Honor MaxBytesToEmit.
+    if (Size > AF.getMaxBytesToEmit())
+      return 0;
 
-    uint64_t FragmentOffset = Address - StartAddress;
-    Layout.setFragmentOffset(&F, FragmentOffset);
+    return Size;
+  }
 
-    // Evaluate fragment size.
-    uint64_t EffectiveSize = 0;
-    switch (F.getKind()) {
-    case MCFragment::FT_Align: {
-      MCAlignFragment &AF = cast<MCAlignFragment>(F);
+  case MCFragment::FT_Org: {
+    const MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
-      EffectiveSize = OffsetToAlignment(Address, AF.getAlignment());
-      if (EffectiveSize > AF.getMaxBytesToEmit())
-        EffectiveSize = 0;
-      break;
-    }
+    // FIXME: We should compute this sooner, we don't want to recurse here, and
+    // we would like to be more functional.
+    int64_t TargetLocation;
+    if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
+      report_fatal_error("expected assembly-time absolute expression");
 
-    case MCFragment::FT_Data:
-      EffectiveSize = cast<MCDataFragment>(F).getContents().size();
-      break;
+    // FIXME: We need a way to communicate this error.
+    int64_t Offset = TargetLocation - FragmentOffset;
+    if (Offset < 0)
+      report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
+                         "' (at offset '" + Twine(FragmentOffset) + "'");
 
-    case MCFragment::FT_Fill: {
-      MCFillFragment &FF = cast<MCFillFragment>(F);
-      EffectiveSize = FF.getValueSize() * FF.getCount();
-      break;
-    }
+    return Offset;
+  }
+  }
 
-    case MCFragment::FT_Inst:
-      EffectiveSize = cast<MCInstFragment>(F).getInstSize();
-      break;
+  assert(0 && "invalid fragment kind");
+  return 0;
+}
 
-    case MCFragment::FT_Org: {
-      MCOrgFragment &OF = cast<MCOrgFragment>(F);
+void MCAsmLayout::LayoutFile() {
+  // Initialize the first section and set the valid fragment layout point. All
+  // actual layout computations are done lazily.
+  LastValidFragment = 0;
+  if (!getSectionOrder().empty())
+    getSectionOrder().front()->Address = 0;
+}
 
-      int64_t TargetLocation;
-      if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
-        report_fatal_error("expected assembly-time absolute expression");
+void MCAsmLayout::LayoutFragment(MCFragment *F) {
+  MCFragment *Prev = F->getPrevNode();
 
-      // FIXME: We need a way to communicate this error.
-      int64_t Offset = TargetLocation - FragmentOffset;
-      if (Offset < 0)
-        report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
-                          "' (at offset '" + Twine(FragmentOffset) + "'");
+  // We should never try to recompute something which is up-to-date.
+  assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
+  // We should never try to compute the fragment layout if the section isn't
+  // up-to-date.
+  assert(isSectionUpToDate(F->getParent()) &&
+         "Attempt to compute fragment before it's section!");
+  // We should never try to compute the fragment layout if it's predecessor
+  // isn't up-to-date.
+  assert((!Prev || isFragmentUpToDate(Prev)) &&
+         "Attempt to compute fragment before it's predecessor!");
 
-      EffectiveSize = Offset;
-      break;
-    }
+  ++stats::FragmentLayouts;
 
-    case MCFragment::FT_ZeroFill: {
-      MCZeroFillFragment &ZFF = cast<MCZeroFillFragment>(F);
+  // Compute the fragment start address.
+  uint64_t StartAddress = F->getParent()->Address;
+  uint64_t Address = StartAddress;
+  if (Prev)
+    Address += Prev->Offset + Prev->EffectiveSize;
+
+  // Compute fragment offset and size.
+  F->Offset = Address - StartAddress;
+  F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress,
+                                                        F->Offset);
+  LastValidFragment = F;
+
+  // If this is the last fragment in a section, update the next section address.
+  if (!F->getNextNode()) {
+    unsigned NextIndex = F->getParent()->getLayoutOrder() + 1;
+    if (NextIndex != getSectionOrder().size())
+      LayoutSection(getSectionOrder()[NextIndex]);
+  }
+}
 
-      // Align the fragment offset; it is safe to adjust the offset freely since
-      // this is only in virtual sections.
-      //
-      // FIXME: We shouldn't be doing this here.
-      Address = RoundUpToAlignment(Address, ZFF.getAlignment());
-      Layout.setFragmentOffset(&F, Address - StartAddress);
+void MCAsmLayout::LayoutSection(MCSectionData *SD) {
+  unsigned SectionOrderIndex = SD->getLayoutOrder();
 
-      EffectiveSize = ZFF.getSize();
-      break;
-    }
-    }
+  ++stats::SectionLayouts;
 
-    Layout.setFragmentEffectiveSize(&F, EffectiveSize);
-    Address += EffectiveSize;
+  // Compute the section start address.
+  uint64_t StartAddress = 0;
+  if (SectionOrderIndex) {
+    MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1];
+    StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev);
   }
 
-  // Set the section sizes.
-  Layout.setSectionSize(&SD, Address - StartAddress);
-  if (IsVirtual)
-    Layout.setSectionFileSize(&SD, 0);
-  else
-    Layout.setSectionFileSize(&SD, Address - StartAddress);
+  // Honor the section alignment requirements.
+  StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
 
-  return Address;
+  // Set the section address.
+  SD->Address = StartAddress;
 }
 
 /// WriteFragmentData - Write the \arg F data to the output file.
@@ -522,6 +533,8 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     MCAlignFragment &AF = cast<MCAlignFragment>(F);
     uint64_t Count = FragmentSize / AF.getValueSize();
 
+    assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
     // FIXME: This error shouldn't actually occur (the front end should emit
     // multiple .align directives to enforce the semantics it wants), but is
     // severe enough that we want to report it. How to handle this?
@@ -535,7 +548,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // the Count bytes.  Then if that did not fill any bytes or there are any
     // bytes left to fill use the the Value and ValueSize to fill the rest.
     // If we are aligning with nops, ask that target to emit the right data.
-    if (AF.getEmitNops()) {
+    if (AF.hasEmitNops()) {
       if (!Asm.getBackend().WriteNopData(Count, OW))
         report_fatal_error("unable to write nop sequence of " +
                           Twine(Count) + " bytes");
@@ -565,7 +578,10 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
   case MCFragment::FT_Fill: {
     MCFillFragment &FF = cast<MCFillFragment>(F);
-    for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
+
+    assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+    for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
       switch (FF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
@@ -590,11 +606,6 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
     break;
   }
-
-  case MCFragment::FT_ZeroFill: {
-    assert(0 && "Invalid zero fill fragment in concrete section!");
-    break;
-  }
   }
 
   assert(OW->getStream().tell() - Start == FragmentSize);
@@ -603,12 +614,27 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
 void MCAssembler::WriteSectionData(const MCSectionData *SD,
                                    const MCAsmLayout &Layout,
                                    MCObjectWriter *OW) const {
-  uint64_t SectionSize = Layout.getSectionSize(SD);
-  uint64_t SectionFileSize = Layout.getSectionFileSize(SD);
-
   // Ignore virtual sections.
   if (getBackend().isVirtualSection(SD->getSection())) {
-    assert(SectionFileSize == 0 && "Invalid size for section!");
+    assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+
+    // Check that contents are only things legal inside a virtual section.
+    for (MCSectionData::const_iterator it = SD->begin(),
+           ie = SD->end(); it != ie; ++it) {
+      switch (it->getKind()) {
+      default:
+        assert(0 && "Invalid fragment in virtual section!");
+      case MCFragment::FT_Align:
+        assert(!cast<MCAlignFragment>(it)->getValueSize() &&
+               "Invalid align in virtual section!");
+        break;
+      case MCFragment::FT_Fill:
+        assert(!cast<MCFillFragment>(it)->getValueSize() &&
+               "Invalid fill in virtual section!");
+        break;
+      }
+    }
+
     return;
   }
 
@@ -619,11 +645,7 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
          ie = SD->end(); it != ie; ++it)
     WriteFragmentData(*this, Layout, *it, OW);
 
-  // Add section padding.
-  assert(SectionFileSize >= SectionSize && "Invalid section sizes!");
-  OW->WriteZeros(SectionFileSize - SectionSize);
-
-  assert(OW->getStream().tell() - Start == SectionFileSize);
+  assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD));
 }
 
 void MCAssembler::Finish() {
@@ -631,20 +653,60 @@ void MCAssembler::Finish() {
       llvm::errs() << "assembler backend - pre-layout\n--\n";
       dump(); });
 
-  // Assign section and fragment ordinals, all subsequent backend code is
-  // responsible for updating these in place.
+  // Create the layout object.
+  MCAsmLayout Layout(*this);
+
+  // Insert additional align fragments for concrete sections to explicitly pad
+  // the previous section to match their alignment requirements. This is for
+  // 'gas' compatibility, it shouldn't strictly be necessary.
+  //
+  // FIXME: This may be Mach-O specific.
+  for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) {
+    MCSectionData *SD = Layout.getSectionOrder()[i];
+
+    // Ignore sections without alignment requirements.
+    unsigned Align = SD->getAlignment();
+    if (Align <= 1)
+      continue;
+
+    // Ignore virtual sections, they don't cause file size modifications.
+    if (getBackend().isVirtualSection(SD->getSection()))
+      continue;
+
+    // Otherwise, create a new align fragment at the end of the previous
+    // section.
+    MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align,
+                                              Layout.getSectionOrder()[i - 1]);
+    AF->setOnlyAlignAddress(true);
+  }
+
+  // Create dummy fragments and assign section ordinals.
   unsigned SectionIndex = 0;
-  unsigned FragmentIndex = 0;
   for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    // Create dummy fragments to eliminate any empty sections, this simplifies
+    // layout.
+    if (it->getFragmentList().empty()) {
+      unsigned ValueSize = 1;
+      if (getBackend().isVirtualSection(it->getSection()))
+        ValueSize = 1;
+      new MCFillFragment(0, 1, 0, it);
+    }
+
     it->setOrdinal(SectionIndex++);
+  }
 
-    for (MCSectionData::iterator it2 = it->begin(),
-           ie2 = it->end(); it2 != ie2; ++it2)
-      it2->setOrdinal(FragmentIndex++);
+  // Assign layout order indices to sections and fragments.
+  unsigned FragmentIndex = 0;
+  for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
+    MCSectionData *SD = Layout.getSectionOrder()[i];
+    SD->setLayoutOrder(i);
+
+    for (MCSectionData::iterator it2 = SD->begin(),
+           ie2 = SD->end(); it2 != ie2; ++it2)
+      it2->setLayoutOrder(FragmentIndex++);
   }
 
   // Layout until everything fits.
-  MCAsmLayout Layout(*this);
   while (LayoutOnce(Layout))
     continue;
 
@@ -678,7 +740,7 @@ void MCAssembler::Finish() {
 
       for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
              ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
-        MCAsmFixup &Fixup = *it3;
+        MCFixup &Fixup = *it3;
 
         // Evaluate the fixup.
         MCValue Target;
@@ -702,7 +764,7 @@ void MCAssembler::Finish() {
   stats::ObjectBytes += OS.tell() - StartOffset;
 }
 
-bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup,
+bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
                                        const MCFragment *DF,
                                        const MCAsmLayout &Layout) const {
   if (getRelaxAll())
@@ -725,7 +787,7 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
   // If this inst doesn't ever need relaxation, ignore it. This occurs when we
   // are intentionally pushing out inst fragments, or because we relaxed a
   // previous instruction to one that doesn't need relaxation.
-  if (!getBackend().MayNeedRelaxation(IF->getInst(), IF->getFixups()))
+  if (!getBackend().MayNeedRelaxation(IF->getInst()))
     return false;
 
   for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
@@ -739,25 +801,8 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
 bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
   ++stats::RelaxationSteps;
 
-  // Layout the concrete sections and fragments.
-  uint64_t Address = 0;
-  for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    // Skip virtual sections.
-    if (getBackend().isVirtualSection(it->getSection()))
-      continue;
-
-    // Layout the section fragments and its size.
-    Address = LayoutSection(*it, Layout, Address);
-  }
-
-  // Layout the virtual sections.
-  for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    if (!getBackend().isVirtualSection(it->getSection()))
-      continue;
-
-    // Layout the section fragments and its size.
-    Address = LayoutSection(*it, Layout, Address);
-  }
+  // Layout the sections in order.
+  Layout.LayoutFile();
 
   // Scan for fragments that need relaxation.
   bool WasRelaxed = false;
@@ -779,7 +824,7 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
       // Relax the fragment.
 
       MCInst Relaxed;
-      getBackend().RelaxInstruction(IF, Relaxed);
+      getBackend().RelaxInstruction(IF->getInst(), Relaxed);
 
       // Encode the new instruction.
       //
@@ -796,17 +841,12 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
       IF->setInst(Relaxed);
       IF->getCode() = Code;
       IF->getFixups().clear();
-      for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
-        MCFixup &F = Fixups[i];
-        IF->getFixups().push_back(MCAsmFixup(F.getOffset(), *F.getValue(),
-                                             F.getKind()));
-      }
+      // FIXME: Eliminate copy.
+      for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+        IF->getFixups().push_back(Fixups[i]);
 
-      // Update the layout, and remember that we relaxed. If we are relaxing
-      // everything, we can skip this step since nothing will depend on updating
-      // the values.
-      if (!getRelaxAll())
-        Layout.UpdateForSlide(IF, SlideAmount);
+      // Update the layout, and remember that we relaxed.
+      Layout.UpdateForSlide(IF, SlideAmount);
       WasRelaxed = true;
     }
   }
@@ -838,12 +878,10 @@ void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
       SD.getFragmentList().insert(it2, DF);
 
       // Update the data fragments layout data.
-      //
-      // FIXME: Add MCAsmLayout utility for this.
       DF->setParent(IF->getParent());
-      DF->setOrdinal(IF->getOrdinal());
-      Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF));
-      Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF));
+      DF->setAtom(IF->getAtom());
+      DF->setLayoutOrder(IF->getLayoutOrder());
+      Layout.FragmentReplaced(IF, DF);
 
       // Copy in the data and the fixups.
       DF->getContents().append(IF->getCode().begin(), IF->getCode().end());
@@ -861,9 +899,10 @@ void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
 
 namespace llvm {
 
-raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
-  OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value
-     << " Kind:" << AF.Kind << ">";
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+  OS << "<MCFixup" << " Offset:" << AF.getOffset()
+     << " Value:" << *AF.getValue()
+     << " Kind:" << AF.getKind() << ">";
   return OS;
 }
 
@@ -872,94 +911,82 @@ raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
 void MCFragment::dump() {
   raw_ostream &OS = llvm::errs();
 
-  OS << "<MCFragment " << (void*) this << " Offset:" << Offset
-     << " EffectiveSize:" << EffectiveSize;
-
-  OS << ">";
-}
-
-void MCAlignFragment::dump() {
-  raw_ostream &OS = llvm::errs();
+  OS << "<";
+  switch (getKind()) {
+  case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+  case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
+  case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
+  case MCFragment::FT_Inst:  OS << "MCInstFragment"; break;
+  case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
+  }
 
-  OS << "<MCAlignFragment ";
-  this->MCFragment::dump();
-  OS << "\n       ";
-  OS << " Alignment:" << getAlignment()
-     << " Value:" << getValue() << " ValueSize:" << getValueSize()
-     << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">";
-}
+  OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+     << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">";
 
-void MCDataFragment::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCDataFragment ";
-  this->MCFragment::dump();
-  OS << "\n       ";
-  OS << " Contents:[";
-  for (unsigned i = 0, e = getContents().size(); i != e; ++i) {
-    if (i) OS << ",";
-    OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
-  }
-  OS << "] (" << getContents().size() << " bytes)";
-
-  if (!getFixups().empty()) {
-    OS << ",\n       ";
-    OS << " Fixups:[";
-    for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) {
-      if (it != fixup_begin()) OS << ",\n                ";
-      OS << *it;
+  switch (getKind()) {
+  case MCFragment::FT_Align: {
+    const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+    if (AF->hasEmitNops())
+      OS << " (emit nops)";
+    if (AF->hasOnlyAlignAddress())
+      OS << " (only align section)";
+    OS << "\n       ";
+    OS << " Alignment:" << AF->getAlignment()
+       << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+       << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+    break;
+  }
+  case MCFragment::FT_Data:  {
+    const MCDataFragment *DF = cast<MCDataFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = DF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
     }
-    OS << "]";
+    OS << "] (" << Contents.size() << " bytes)";
+
+    if (!DF->getFixups().empty()) {
+      OS << ",\n       ";
+      OS << " Fixups:[";
+      for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+             ie = DF->fixup_end(); it != ie; ++it) {
+        if (it != DF->fixup_begin()) OS << ",\n                ";
+        OS << *it;
+      }
+      OS << "]";
+    }
+    break;
+  }
+  case MCFragment::FT_Fill:  {
+    const MCFillFragment *FF = cast<MCFillFragment>(this);
+    OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+       << " Size:" << FF->getSize();
+    break;
+  }
+  case MCFragment::FT_Inst:  {
+    const MCInstFragment *IF = cast<MCInstFragment>(this);
+    OS << "\n       ";
+    OS << " Inst:";
+    IF->getInst().dump_pretty(OS);
+    break;
+  }
+  case MCFragment::FT_Org:  {
+    const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+    OS << "\n       ";
+    OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+    break;
+  }
   }
-
-  OS << ">";
-}
-
-void MCFillFragment::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCFillFragment ";
-  this->MCFragment::dump();
-  OS << "\n       ";
-  OS << " Value:" << getValue() << " ValueSize:" << getValueSize()
-     << " Count:" << getCount() << ">";
-}
-
-void MCInstFragment::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCInstFragment ";
-  this->MCFragment::dump();
-  OS << "\n       ";
-  OS << " Inst:";
-  getInst().dump_pretty(OS);
   OS << ">";
 }
 
-void MCOrgFragment::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCOrgFragment ";
-  this->MCFragment::dump();
-  OS << "\n       ";
-  OS << " Offset:" << getOffset() << " Value:" << getValue() << ">";
-}
-
-void MCZeroFillFragment::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCZeroFillFragment ";
-  this->MCFragment::dump();
-  OS << "\n       ";
-  OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">";
-}
-
 void MCSectionData::dump() {
   raw_ostream &OS = llvm::errs();
 
   OS << "<MCSectionData";
   OS << " Alignment:" << getAlignment() << " Address:" << Address
-     << " Size:" << Size << " FileSize:" << FileSize
      << " Fragments:[\n      ";
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     if (it != begin()) OS << ",\n      ";
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index dc757bb..53ffc94 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -11,18 +11,22 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCLabel.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
 typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
 
 
 MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
   MachOUniquingMap = 0;
   ELFUniquingMap = 0;
+  COFFUniquingMap = 0;
 }
 
 MCContext::~MCContext() {
@@ -32,6 +36,7 @@ MCContext::~MCContext() {
   // If we have the MachO uniquing map, free it.
   delete (MachOUniqueMapTy*)MachOUniquingMap;
   delete (ELFUniqueMapTy*)ELFUniquingMap;
+  delete (COFFUniqueMapTy*)COFFUniquingMap;
 }
 
 //===----------------------------------------------------------------------===//
@@ -67,6 +72,34 @@ MCSymbol *MCContext::CreateTempSymbol() {
                            "tmp" + Twine(NextUniqueID++));
 }
 
+unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
+  MCLabel *&Label = Instances[LocalLabelVal];
+  if (!Label)
+    Label = new (*this) MCLabel(0);
+  return Label->incInstance();
+}
+
+unsigned MCContext::GetInstance(int64_t LocalLabelVal) {
+  MCLabel *&Label = Instances[LocalLabelVal];
+  if (!Label)
+    Label = new (*this) MCLabel(0);
+  return Label->getInstance();
+}
+
+MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
+  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                           Twine(LocalLabelVal) +
+                           "\2" +
+			   Twine(NextInstance(LocalLabelVal)));
+}
+MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
+                                               int bORf) {
+  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                           Twine(LocalLabelVal) +
+                           "\2" +
+			   Twine(GetInstance(LocalLabelVal) + bORf));
+}
+
 MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
   return Symbols.lookup(Name);
 }
@@ -122,4 +155,22 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
   return Result;
 }
 
-
+const MCSection *MCContext::getCOFFSection(StringRef Section,
+                                           unsigned Characteristics,
+                                           int Selection,
+                                           SectionKind Kind) {
+  if (COFFUniquingMap == 0)
+    COFFUniquingMap = new COFFUniqueMapTy();
+  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
+  
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+  
+  MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
+                                                    Characteristics,
+                                                    Selection, Kind);
+  
+  Entry.setValue(Result);
+  return Result;
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index bc670ab..c000dd7 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -39,6 +39,10 @@ void MCExpr::print(raw_ostream &OS) const {
     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
     const MCSymbol &Sym = SRE.getSymbol();
 
+    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 ||
+	SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16)
+      OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
     // Parenthesize names that start with $ so that they don't look like
     // absolute names.
     if (Sym.getName()[0] == '$')
@@ -46,7 +50,9 @@ void MCExpr::print(raw_ostream &OS) const {
     else
       OS << Sym;
 
-    if (SRE.getKind() != MCSymbolRefExpr::VK_None)
+    if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+	SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 &&
+	SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16)
       OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
 
     return;
@@ -169,6 +175,9 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PLT: return "PLT";
   case VK_TLSGD: return "TLSGD";
   case VK_TPOFF: return "TPOFF";
+  case VK_ARM_HI16: return ":upper16:";
+  case VK_ARM_LO16: return ":lower16:";
+  case VK_TLVP: return "TLVP";
   }
 }
 
@@ -184,6 +193,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("PLT", VK_PLT)
     .Case("TLSGD", VK_TLSGD)
     .Case("TPOFF", VK_TPOFF)
+    .Case("TLVP", VK_TLVP)
     .Default(VK_Invalid);
 }
 
@@ -249,7 +259,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
 
     // Evaluate recursively if this is a variable.
     if (Sym.isVariable()) {
-      if (!Sym.getValue()->EvaluateAsRelocatable(Res, Layout))
+      if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout))
         return false;
 
       // Absolutize symbol differences between defined symbols when we have a
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index de142dc..4cb628b 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -57,7 +57,7 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
     OS << Separator;
     getOperand(i).print(OS, MAI);
   }
-  OS << ">\n";
+  OS << ">";
 }
 
 void MCInst::dump() const {
diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp
new file mode 100644
index 0000000..9c0fc92
--- /dev/null
+++ b/lib/MC/MCLabel.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCLabel::print(raw_ostream &OS) const {
+  OS << '"' << getInstance() << '"';
+}
+
+void MCLabel::dump() const {
+  print(dbgs());
+}
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
new file mode 100644
index 0000000..b96040a
--- /dev/null
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -0,0 +1,208 @@
+//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class MCLoggingStreamer : public MCStreamer {
+  llvm::OwningPtr<MCStreamer> Child;
+  
+  raw_ostream &OS;
+
+public:
+  MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
+    : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
+
+  void LogCall(const char *Function) {
+    OS << Function << "\n";
+  }
+
+  void LogCall(const char *Function, const Twine &Message) {
+    OS << Function << ": " << Message << "\n";
+  }
+
+  virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
+  
+  virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
+
+  virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
+
+  virtual void AddComment(const Twine &T) {
+    LogCall("AddComment", T);
+    return Child->AddComment(T);
+  }
+
+  virtual void AddBlankLine() {
+    LogCall("AddBlankLine");
+    return Child->AddBlankLine();
+  }
+
+  virtual void SwitchSection(const MCSection *Section) {
+    CurSection = Section;
+    LogCall("SwitchSection");
+    return Child->SwitchSection(Section);
+  }
+
+  virtual void EmitLabel(MCSymbol *Symbol) {
+    LogCall("EmitLabel");
+    return Child->EmitLabel(Symbol);
+  }
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    LogCall("EmitAssemblerFlag");
+    return Child->EmitAssemblerFlag(Flag);
+  }
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+    LogCall("EmitAssignment");
+    return Child->EmitAssignment(Symbol, Value);
+  }
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+    LogCall("EmitSymbolAttribute");
+    return Child->EmitSymbolAttribute(Symbol, Attribute);
+  }
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    LogCall("EmitSymbolDesc");
+    return Child->EmitSymbolDesc(Symbol, DescValue);
+  }
+
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    LogCall("BeginCOFFSymbolDef");
+    return Child->BeginCOFFSymbolDef(Symbol);
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    LogCall("EmitCOFFSymbolStorageClass");
+    return Child->EmitCOFFSymbolStorageClass(StorageClass);
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    LogCall("EmitCOFFSymbolType");
+    return Child->EmitCOFFSymbolType(Type);
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    LogCall("EndCOFFSymbolDef");
+    return Child->EndCOFFSymbolDef();
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    LogCall("EmitELFSize");
+    return Child->EmitELFSize(Symbol, Value);
+  }
+
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment) {
+    LogCall("EmitCommonSymbol");
+    return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    LogCall("EmitLocalCommonSymbol");
+    return Child->EmitLocalCommonSymbol(Symbol, Size);
+  }
+  
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    LogCall("EmitZerofill");
+    return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+                               uint64_t Size, unsigned ByteAlignment = 0) {
+    LogCall("EmitTBSSSymbol");
+    return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    LogCall("EmitBytes");
+    return Child->EmitBytes(Data, AddrSpace);
+  }
+
+  virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){
+    LogCall("EmitValue");
+    return Child->EmitValue(Value, Size, AddrSpace);
+  }
+
+  virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) {
+    LogCall("EmitIntValue");
+    return Child->EmitIntValue(Value, Size, AddrSpace);
+  }
+
+  virtual void EmitGPRel32Value(const MCExpr *Value) {
+    LogCall("EmitGPRel32Value");
+    return Child->EmitGPRel32Value(Value);
+  }
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace) {
+    LogCall("EmitFill");
+    return Child->EmitFill(NumBytes, FillValue, AddrSpace);
+  }
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0) {
+    LogCall("EmitValueToAlignment");
+    return Child->EmitValueToAlignment(ByteAlignment, Value,
+                                       ValueSize, MaxBytesToEmit);
+  }
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0) {
+    LogCall("EmitCodeAlignment");
+    return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
+  }
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0) {
+    LogCall("EmitValueToOffset");
+    return Child->EmitValueToOffset(Offset, Value);
+  }
+
+  virtual void EmitFileDirective(StringRef Filename) {
+    LogCall("EmitFileDirective", "FileName:" + Filename);
+    return Child->EmitFileDirective(Filename);
+  }
+
+  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    LogCall("EmitDwarfFileDirective",
+            "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
+    return Child->EmitDwarfFileDirective(FileNo, Filename);
+  }
+
+  virtual void EmitInstruction(const MCInst &Inst) {
+    LogCall("EmitInstruction");
+    return Child->EmitInstruction(Inst);
+  }
+
+  virtual void EmitRawText(StringRef String) {
+    LogCall("EmitRawText", "\"" + String + "\"");
+    return Child->EmitRawText(String);
+  }
+
+  virtual void Finish() {
+    LogCall("Finish");
+    return Child->Finish();
+  }
+
+};
+
+} // end anonymous namespace.
+
+MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
+  return new MCLoggingStreamer(Child, OS);
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 120f837..27e4e98 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
@@ -25,30 +26,14 @@ using namespace llvm;
 namespace {
 
 class MCMachOStreamer : public MCStreamer {
-  /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest
-  /// 16 bits of the implementation defined flags.
-  enum SymbolFlags { // See <mach-o/nlist.h>.
-    SF_DescFlagsMask                        = 0xFFFF,
-
-    // Reference type flags.
-    SF_ReferenceTypeMask                    = 0x0007,
-    SF_ReferenceTypeUndefinedNonLazy        = 0x0000,
-    SF_ReferenceTypeUndefinedLazy           = 0x0001,
-    SF_ReferenceTypeDefined                 = 0x0002,
-    SF_ReferenceTypePrivateDefined          = 0x0003,
-    SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
-    SF_ReferenceTypePrivateUndefinedLazy    = 0x0005,
-
-    // Other 'desc' flags.
-    SF_NoDeadStrip                          = 0x0020,
-    SF_WeakReference                        = 0x0040,
-    SF_WeakDefinition                       = 0x0080
-  };
 
 private:
   MCAssembler Assembler;
   MCSectionData *CurSectionData;
 
+  /// Track the current atom for each section.
+  DenseMap<const MCSectionData*, MCSymbolData*> CurrentAtomMap;
+
 private:
   MCFragment *getCurrentFragment() const {
     assert(CurSectionData && "No current section!");
@@ -64,10 +49,20 @@ private:
   MCDataFragment *getOrCreateDataFragment() const {
     MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
     if (!F)
-      F = new MCDataFragment(CurSectionData);
+      F = createDataFragment();
     return F;
   }
 
+  /// Create a new data fragment in the current section.
+  MCDataFragment *createDataFragment() const {
+    MCDataFragment *DF = new MCDataFragment(CurSectionData);
+    DF->setAtom(CurrentAtomMap.lookup(CurSectionData));
+    return DF;
+  }
+
+  void EmitInstToFragment(const MCInst &Inst);
+  void EmitInstToData(const MCInst &Inst);
+
 public:
   MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
                   raw_ostream &_OS, MCCodeEmitter *_Emitter)
@@ -114,6 +109,18 @@ public:
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "macho doesn't support this directive");
+  }
   virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
     assert(0 && "macho doesn't support this directive");
   }
@@ -122,6 +129,8 @@ public:
   }
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
   virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
   virtual void EmitGPRel32Value(const MCExpr *Value) {
@@ -134,14 +143,14 @@ public:
                                  unsigned MaxBytesToEmit = 0);
   virtual void EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
-  
+
   virtual void EmitFileDirective(StringRef Filename) {
-    errs() << "FIXME: MCMachoStreamer:EmitFileDirective not implemented\n";
+    report_fatal_error("unsupported directive: '.file'");
   }
   virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
-    errs() << "FIXME: MCMachoStreamer:EmitDwarfFileDirective not implemented\n";
+    report_fatal_error("unsupported directive: '.file'");
   }
-  
+
   virtual void EmitInstruction(const MCInst &Inst);
   virtual void Finish();
 
@@ -152,7 +161,7 @@ public:
 
 void MCMachOStreamer::SwitchSection(const MCSection *Section) {
   assert(Section && "Cannot switch to a null section!");
-  
+
   // If already in this section, then this is a noop.
   if (Section == CurSection) return;
 
@@ -162,20 +171,39 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) {
 
 void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(CurSection && "Cannot emit before setting section!");
+
+  MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+
+  // Update the current atom map, if necessary.
+  bool MustCreateFragment = false;
+  if (Assembler.isSymbolLinkerVisible(&SD)) {
+    CurrentAtomMap[CurSectionData] = &SD;
+
+    // We have to create a new fragment, fragments cannot span atoms.
+    MustCreateFragment = true;
+  }
 
   // FIXME: This is wasteful, we don't necessarily need to create a data
   // fragment. Instead, we should mark the symbol as pointing into the data
   // fragment if it exists, otherwise we should just queue the label and set its
   // fragment pointer when we emit the next fragment.
-  MCDataFragment *F = getOrCreateDataFragment();
-  MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+  MCDataFragment *F =
+    MustCreateFragment ? createDataFragment() : getOrCreateDataFragment();
   assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
   SD.setFragment(F);
   SD.setOffset(F->getContents().size());
 
-  // This causes the reference type and weak reference flags to be cleared.
-  SD.setFlags(SD.getFlags() & ~(SF_WeakReference | SF_ReferenceTypeMask));
-  
+  // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
+  // to clear the weak reference and weak definition bits too, but the
+  // implementation was buggy. For now we just try to match 'as', for
+  // diffability.
+  //
+  // FIXME: Cleanup this code, these bits should be emitted based on semantic
+  // properties, not on the order of definition, etc.
+  SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+
   Symbol->setSection(*CurSection);
 }
 
@@ -190,13 +218,9 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 }
 
 void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  // Only absolute symbols can be redefined.
-  assert((Symbol->isUndefined() || Symbol->isAbsolute()) &&
-         "Cannot define a symbol twice!");
-
   // FIXME: Lift context changes into super class.
-  // FIXME: Set associated section.
-  Symbol->setValue(AddValueSymbols(Value));
+  Assembler.getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
 }
 
 void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
@@ -243,6 +267,13 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
 
   case MCSA_Global:
     SD.setExternal(true);
+    // This effectively clears the undefined lazy bit, in Darwin 'as', although
+    // it isn't very consistent because it implements this as part of symbol
+    // lookup.
+    //
+    // FIXME: Cleanup this code, these bits should be emitted based on semantic
+    // properties, not on the order of definition, etc.
+    SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
     break;
 
   case MCSA_LazyReference:
@@ -280,7 +311,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
 
 void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
   // Encode the 'desc' value into the lowest implementation defined bits.
-  assert(DescValue == (DescValue & SF_DescFlagsMask) && 
+  assert(DescValue == (DescValue & SF_DescFlagsMask) &&
          "Invalid .desc value!");
   Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask);
 }
@@ -309,8 +340,14 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
 
   MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
 
-  MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData);
+  // Emit an align fragment if necessary.
+  if (ByteAlignment != 1)
+    new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+
+  MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
   SD.setFragment(F);
+  if (Assembler.isSymbolLinkerVisible(&SD))
+    F->setAtom(&SD);
 
   Symbol->setSection(*Section);
 
@@ -319,6 +356,14 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
     SectData.setAlignment(ByteAlignment);
 }
 
+// This should always be called with the thread local bss section.  Like the
+// .zerofill directive this doesn't actually switch sections on us.
+void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                     uint64_t Size, unsigned ByteAlignment) {
+  EmitZerofill(Section, Symbol, Size, ByteAlignment);
+  return;
+}
+
 void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
   getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
 }
@@ -334,8 +379,9 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
     for (unsigned i = 0; i != Size; ++i)
       DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
   } else {
-    DF->addFixup(MCAsmFixup(DF->getContents().size(), *AddValueSymbols(Value),
-                            MCFixup::getKindForSize(Size)));
+    DF->addFixup(MCFixup::Create(DF->getContents().size(),
+                                 AddValueSymbols(Value),
+                                 MCFixup::getKindForSize(Size)));
     DF->getContents().resize(DF->getContents().size() + Size, 0);
   }
 }
@@ -345,8 +391,9 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                            unsigned MaxBytesToEmit) {
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      false /* EmitNops */, CurSectionData);
+  MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize,
+                                      MaxBytesToEmit, CurSectionData);
+  F->setAtom(CurrentAtomMap.lookup(CurSectionData));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > CurSectionData->getAlignment())
@@ -357,8 +404,10 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
                                         unsigned MaxBytesToEmit) {
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
-                      true /* EmitNops */, CurSectionData);
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           CurSectionData);
+  F->setEmitNops(true);
+  F->setAtom(CurrentAtomMap.lookup(CurSectionData));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > CurSectionData->getAlignment())
@@ -367,19 +416,30 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
 
 void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset,
                                         unsigned char Value) {
-  new MCOrgFragment(*Offset, Value, CurSectionData);
+  MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData);
+  F->setAtom(CurrentAtomMap.lookup(CurSectionData));
 }
 
-void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
-  // Scan for values.
-  for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
-    if (Inst.getOperand(i).isExpr())
-      AddValueSymbols(Inst.getOperand(i).getExpr());
+void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData);
+  IF->setAtom(CurrentAtomMap.lookup(CurSectionData));
 
-  CurSectionData->setHasInstructions(true);
+  // Add the fixups and data.
+  //
+  // FIXME: Revisit this design decision when relaxation is done, we may be
+  // able to get away with not storing any extra data in the MCInst.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
 
-  // FIXME-PERF: Common case is that we don't need to relax, encode directly
-  // onto the data fragments buffers.
+  IF->getCode() = Code;
+  IF->getFixups() = Fixups;
+}
+
+void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
 
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
@@ -387,47 +447,41 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
   Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
   VecOS.flush();
 
-  // FIXME: Eliminate this copy.
-  SmallVector<MCAsmFixup, 4> AsmFixups;
+  // Add the fixups and data.
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
-    MCFixup &F = Fixups[i];
-    AsmFixups.push_back(MCAsmFixup(F.getOffset(), *F.getValue(),
-                                   F.getKind()));
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
   }
+  DF->getContents().append(Code.begin(), Code.end());
+}
 
-  // See if we might need to relax this instruction, if so it needs its own
-  // fragment.
-  //
-  // FIXME-PERF: Support target hook to do a fast path that avoids the encoder,
-  // when we can immediately tell that we will get something which might need
-  // relaxation (and compute its size).
-  //
-  // FIXME-PERF: We should also be smart about immediately relaxing instructions
-  // which we can already show will never possibly fit (we can also do a very
-  // good job of this before we do the first relaxation pass, because we have
-  // total knowledge about undefined symbols at that point). Even now, though,
-  // we can do a decent job, especially on Darwin where scattering means that we
-  // are going to often know that we can never fully resolve a fixup.
-  if (Assembler.getBackend().MayNeedRelaxation(Inst, AsmFixups)) {
-    MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData);
-
-    // Add the fixups and data.
-    //
-    // FIXME: Revisit this design decision when relaxation is done, we may be
-    // able to get away with not storing any extra data in the MCInst.
-    IF->getCode() = Code;
-    IF->getFixups() = AsmFixups;
+void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = Inst.getNumOperands(); i--; )
+    if (Inst.getOperand(i).isExpr())
+      AddValueSymbols(Inst.getOperand(i).getExpr());
 
+  CurSectionData->setHasInstructions(true);
+
+  // If this instruction doesn't need relaxation, just emit it as data.
+  if (!Assembler.getBackend().MayNeedRelaxation(Inst)) {
+    EmitInstToData(Inst);
     return;
   }
 
-  // Add the fixups and data.
-  MCDataFragment *DF = getOrCreateDataFragment();
-  for (unsigned i = 0, e = AsmFixups.size(); i != e; ++i) {
-    AsmFixups[i].Offset += DF->getContents().size();
-    DF->addFixup(AsmFixups[i]);
+  // Otherwise, if we are relaxing everything, relax the instruction as much as
+  // possible and emit it as data.
+  if (Assembler.getRelaxAll()) {
+    MCInst Relaxed;
+    Assembler.getBackend().RelaxInstruction(Inst, Relaxed);
+    while (Assembler.getBackend().MayNeedRelaxation(Relaxed))
+      Assembler.getBackend().RelaxInstruction(Relaxed, Relaxed);
+    EmitInstToData(Relaxed);
+    return;
   }
-  DF->getContents().append(Code.begin(), Code.end());
+
+  // Otherwise emit to a separate fragment.
+  EmitInstToFragment(Inst);
 }
 
 void MCMachOStreamer::Finish() {
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 5f0c64a..5332ade 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -42,6 +42,12 @@ namespace {
     virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
 
     virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+    virtual void EmitCOFFSymbolType(int Type) {}
+    virtual void EndCOFFSymbolDef() {}
+
     virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
     virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                   unsigned ByteAlignment) {}
@@ -49,7 +55,8 @@ namespace {
 
     virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                               unsigned Size = 0, unsigned ByteAlignment = 0) {}
-
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment) {}
     virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
 
     virtual void EmitValue(const MCExpr *Value, unsigned Size,
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 1183312..1cbe09a 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -132,11 +132,6 @@ AsmToken AsmLexer::LexLineComment() {
 ///   Decimal integer: [1-9][0-9]*
 /// TODO: FP literal.
 AsmToken AsmLexer::LexDigit() {
-  if (*CurPtr == ':')
-    return ReturnError(TokStart, "FIXME: local label not implemented");
-  if (*CurPtr == 'f' || *CurPtr == 'b')
-    return ReturnError(TokStart, "FIXME: directional label not implemented");
-  
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0') {
     while (isdigit(*CurPtr))
@@ -158,6 +153,12 @@ AsmToken AsmLexer::LexDigit() {
   
   if (*CurPtr == 'b') {
     ++CurPtr;
+    // See if we actually have "0b" as part of something like "jmp 0b\n"
+    if (!isdigit(CurPtr[0])) {
+      --CurPtr;
+      StringRef Result(TokStart, CurPtr - TokStart);
+      return AsmToken(AsmToken::Integer, Result, 0);
+    }
     const char *NumStart = CurPtr;
     while (CurPtr[0] == '0' || CurPtr[0] == '1')
       ++CurPtr;
@@ -280,6 +281,7 @@ AsmToken AsmLexer::LexToken() {
   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
   case '=': 
     if (*CurPtr == '=')
       return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index a63d2e4..4523eab 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCParser/AsmParser.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -189,6 +190,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
     MCSymbol *Sym = CreateSymbol(Split.first);
 
+    // Mark the symbol as used in an expression.
+    Sym->setUsedInExpr(true);
+
     // Lookup the symbol variant if used.
     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
     if (Split.first.size() != getTok().getIdentifier().size())
@@ -199,11 +203,11 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
-    if (Sym->getValue() && isa<MCConstantExpr>(Sym->getValue())) {
+    if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
       if (Variant)
         return Error(EndLoc, "unexpected modified on variable reference");
 
-      Res = Sym->getValue();
+      Res = Sym->getVariableValue();
       return false;
     }
 
@@ -211,11 +215,28 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
     return false;
   }
-  case AsmToken::Integer:
-    Res = MCConstantExpr::Create(getTok().getIntVal(), getContext());
+  case AsmToken::Integer: {
+    SMLoc Loc = getTok().getLoc();
+    int64_t IntVal = getTok().getIntVal();
+    Res = MCConstantExpr::Create(IntVal, getContext());
     EndLoc = Lexer.getLoc();
     Lex(); // Eat token.
+    // Look for 'b' or 'f' following an Integer as a directional label
+    if (Lexer.getKind() == AsmToken::Identifier) {
+      StringRef IDVal = getTok().getString();
+      if (IDVal == "f" || IDVal == "b"){
+        MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
+                                                      IDVal == "f" ? 1 : 0);
+        Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+                                      getContext());
+        if(IDVal == "b" && Sym->isUndefined())
+          return Error(Loc, "invalid reference to undefined symbol");
+        EndLoc = Lexer.getLoc();
+        Lex(); // Eat identifier.
+      }
+    }
     return false;
+  }
   case AsmToken::Dot: {
     // This is a '.' reference, which references the current PC.  Emit a
     // temporary label to the streamer and refer to it.
@@ -411,6 +432,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
 ///   ::= Label* Identifier OperandList* EndOfStatement
 bool AsmParser::ParseStatement() {
   if (Lexer.is(AsmToken::EndOfStatement)) {
+    Out.AddBlankLine();
     Lex();
     return false;
   }
@@ -419,7 +441,25 @@ bool AsmParser::ParseStatement() {
   AsmToken ID = getTok();
   SMLoc IDLoc = ID.getLoc();
   StringRef IDVal;
-  if (ParseIdentifier(IDVal)) {
+  int64_t LocalLabelVal = -1;
+  // GUESS allow an integer followed by a ':' as a directional local label
+  if (Lexer.is(AsmToken::Integer)) {
+    LocalLabelVal = getTok().getIntVal();
+    if (LocalLabelVal < 0) {
+      if (!TheCondState.Ignore)
+        return TokError("unexpected token at start of statement");
+      IDVal = "";
+    }
+    else {
+      IDVal = getTok().getString();
+      Lex(); // Consume the integer token to be used as an identifier token.
+      if (Lexer.getKind() != AsmToken::Colon) {
+	  if (!TheCondState.Ignore)
+	    return TokError("unexpected token at start of statement");
+      }
+    }
+  }
+  else if (ParseIdentifier(IDVal)) {
     if (!TheCondState.Ignore)
       return TokError("unexpected token at start of statement");
     IDVal = "";
@@ -456,13 +496,25 @@ bool AsmParser::ParseStatement() {
     // FIXME: Diagnostics. Note the location of the definition as a label.
     // FIXME: This doesn't diagnose assignment to a symbol which has been
     // implicitly marked as external.
-    MCSymbol *Sym = CreateSymbol(IDVal);
-    if (!Sym->isUndefined())
+    MCSymbol *Sym;
+    if (LocalLabelVal == -1)
+      Sym = CreateSymbol(IDVal);
+    else
+      Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+    if (!Sym->isUndefined() || Sym->isVariable())
       return Error(IDLoc, "invalid symbol redefinition");
     
     // Emit the label.
     Out.EmitLabel(Sym);
    
+    // Consume any end of statement token, if present, to avoid spurious
+    // AddBlankLine calls().
+    if (Lexer.is(AsmToken::EndOfStatement)) {
+      Lex();
+      if (Lexer.is(AsmToken::Eof))
+        return false;
+    }
+
     return ParseStatement();
   }
 
@@ -620,6 +672,16 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs",
                                          MCSectionMachO::S_CSTRING_LITERALS);
     
+    if (IDVal == ".tdata")
+      return ParseDirectiveSectionSwitch("__DATA", "__thread_data",
+                                        MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+    if (IDVal == ".tlv")
+      return ParseDirectiveSectionSwitch("__DATA", "__thread_vars",
+                                      MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+    if (IDVal == ".thread_init_func")
+      return ParseDirectiveSectionSwitch("__DATA", "__thread_init",
+                        MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+    
     // Assembler features
     if (IDVal == ".set")
       return ParseDirectiveSet();
@@ -686,6 +748,8 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveSymbolAttribute(MCSA_Protected);
     if (IDVal == ".reference")
       return ParseDirectiveSymbolAttribute(MCSA_Reference);
+    if (IDVal == ".type")
+      return ParseDirectiveELFType();
     if (IDVal == ".weak")
       return ParseDirectiveSymbolAttribute(MCSA_Weak);
     if (IDVal == ".weak_definition")
@@ -703,6 +767,8 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveDarwinSymbolDesc();
     if (IDVal == ".lsym")
       return ParseDirectiveDarwinLsym();
+    if (IDVal == ".tbss")
+      return ParseDirectiveDarwinTBSS();
 
     if (IDVal == ".subsections_via_symbols")
       return ParseDirectiveDarwinSubsectionsViaSymbols();
@@ -729,8 +795,13 @@ bool AsmParser::ParseStatement() {
     return false;
   }
 
+  // Canonicalize the opcode to lower case.
+  SmallString<128> Opcode;
+  for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
+    Opcode.push_back(tolower(IDVal[i]));
+  
   SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
-  bool HadError = getTargetParser().ParseInstruction(IDVal, IDLoc,
+  bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
                                                      ParsedOperands);
   if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
     HadError = TokError("unexpected token in argument list");
@@ -786,11 +857,13 @@ bool AsmParser::ParseAssignment(const StringRef &Name) {
     //
     // FIXME: Diagnostics. Note the location of the definition as a label.
     // FIXME: Diagnose assignment to protected identifier (e.g., register name).
-    if (!Sym->isUndefined() && !Sym->isAbsolute())
+    if (Sym->isUndefined() && !Sym->isUsedInExpr())
+      ; // Allow redefinitions of undefined symbols only used in directives.
+    else if (!Sym->isUndefined() && !Sym->isAbsolute())
       return Error(EqualLoc, "redefinition of '" + Name + "'");
     else if (!Sym->isVariable())
       return Error(EqualLoc, "invalid assignment to '" + Name + "'");
-    else if (!isa<MCConstantExpr>(Sym->getValue()))
+    else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
       return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
                    Name + "'");
   } else
@@ -798,6 +871,8 @@ bool AsmParser::ParseAssignment(const StringRef &Name) {
 
   // FIXME: Handle '.'.
 
+  Sym->setUsedInExpr(true);
+
   // Do the assignment.
   Out.EmitAssignment(Sym, Value);
 
@@ -1008,7 +1083,11 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
       if (ParseExpression(Value))
         return true;
 
-      Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+      // Special case constant expressions to match code generator.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
+        Out.EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
+      else
+        Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE);
 
       if (Lexer.is(AsmToken::EndOfStatement))
         break;
@@ -1090,8 +1169,7 @@ bool AsmParser::ParseDirectiveFill() {
     return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
 
   for (uint64_t i = 0, e = NumValues; i != e; ++i)
-    Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize,
-                  DEFAULT_ADDRSPACE);
+    Out.EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
 
   return false;
 }
@@ -1169,10 +1247,8 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
   Lex();
 
-  if (!HasFillExpr) {
-    // FIXME: Sometimes fill with nop.
+  if (!HasFillExpr)
     FillExpr = 0;
-  }
 
   // Compute alignment in bytes.
   if (IsPow2) {
@@ -1200,14 +1276,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
     }
   }
 
-  // FIXME: hard code the parser to use EmitCodeAlignment for text when using
-  // the TextAlignFillValue.
-  if(Out.getCurrentSection()->getKind().isText() && 
-     Lexer.getMAI().getTextAlignFillValue() == FillExpr)
+  // Check whether we should use optimal code alignment for this .align
+  // directive.
+  //
+  // FIXME: This should be using a target hook.
+  bool UseCodeAlign = false;
+  if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
+        Out.getCurrentSection()))
+      UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+  if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
+      ValueSize == 1 && UseCodeAlign) {
     Out.EmitCodeAlignment(Alignment, MaxBytesToFill);
-  else
+  } else {
     // FIXME: Target specific behavior about how the "extra" bytes are filled.
     Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+  }
 
   return false;
 }
@@ -1239,6 +1322,52 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
   return false;  
 }
 
+/// ParseDirectiveELFType
+///  ::= .type identifier , @attribute
+bool AsmParser::ParseDirectiveELFType() {
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = CreateSymbol(Name);
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.type' directive");
+  Lex();
+
+  if (Lexer.isNot(AsmToken::At))
+    return TokError("expected '@' before type");
+  Lex();
+
+  StringRef Type;
+  SMLoc TypeLoc;
+
+  TypeLoc = Lexer.getLoc();
+  if (ParseIdentifier(Type))
+    return TokError("expected symbol type in directive");
+
+  MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+    .Case("function", MCSA_ELF_TypeFunction)
+    .Case("object", MCSA_ELF_TypeObject)
+    .Case("tls_object", MCSA_ELF_TypeTLS)
+    .Case("common", MCSA_ELF_TypeCommon)
+    .Case("notype", MCSA_ELF_TypeNoType)
+    .Default(MCSA_Invalid);
+
+  if (Attr == MCSA_Invalid)
+    return Error(TypeLoc, "unsupported attribute in '.type' directive");
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.type' directive");
+
+  Lex();
+
+  Out.EmitSymbolAttribute(Sym, Attr);
+
+  return false;
+}
+
 /// ParseDirectiveDarwinSymbolDesc
 ///  ::= .desc identifier , expression
 bool AsmParser::ParseDirectiveDarwinSymbolDesc() {
@@ -1316,7 +1445,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
     return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
                  "be less than zero");
 
-  // NOTE: The alignment in the directive is a power of 2 value, the assember
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
   // may internally end up wanting an alignment in bytes.
   // FIXME: Diagnose overflow.
   if (Pow2Alignment < 0)
@@ -1344,22 +1473,18 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
 ///  ::= .zerofill segname , sectname [, identifier , size_expression [
 ///      , align_expression ]]
 bool AsmParser::ParseDirectiveDarwinZerofill() {
-  // FIXME: Handle quoted names here.
-
-  if (Lexer.isNot(AsmToken::Identifier))
+  StringRef Segment;
+  if (ParseIdentifier(Segment))
     return TokError("expected segment name after '.zerofill' directive");
-  StringRef Segment = getTok().getString();
-  Lex();
 
   if (Lexer.isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
   Lex();
- 
-  if (Lexer.isNot(AsmToken::Identifier))
+
+  StringRef Section;
+  if (ParseIdentifier(Section))
     return TokError("expected section name after comma in '.zerofill' "
                     "directive");
-  StringRef Section = getTok().getString();
-  Lex();
 
   // If this is the end of the line all that was wanted was to create the
   // the section but with no symbol.
@@ -1375,13 +1500,13 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
     return TokError("unexpected token in directive");
   Lex();
 
-  if (Lexer.isNot(AsmToken::Identifier))
+  SMLoc IDLoc = Lexer.getLoc();
+  StringRef IDStr;
+  if (ParseIdentifier(IDStr))
     return TokError("expected identifier in directive");
   
   // handle the identifier as the key symbol.
-  SMLoc IDLoc = Lexer.getLoc();
-  MCSymbol *Sym = CreateSymbol(getTok().getString());
-  Lex();
+  MCSymbol *Sym = CreateSymbol(IDStr);
 
   if (Lexer.isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
@@ -1410,7 +1535,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
     return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
                  "than zero");
 
-  // NOTE: The alignment in the directive is a power of 2 value, the assember
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
   // may internally end up wanting an alignment in bytes.
   // FIXME: Diagnose overflow.
   if (Pow2Alignment < 0)
@@ -1431,6 +1556,60 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
   return false;
 }
 
+/// ParseDirectiveDarwinTBSS
+///  ::= .tbss identifier, size, align
+bool AsmParser::ParseDirectiveDarwinTBSS() {
+  SMLoc IDLoc = Lexer.getLoc();
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+    
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = CreateSymbol(Name);
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = Lexer.getLoc();
+  if (ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (Lexer.is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = Lexer.getLoc();
+    if (ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+  
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.tbss' directive");
+  
+  Lex();
+
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+                 "zero");
+
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+                 "than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+  
+  Out.EmitTBSSSymbol(Ctx.getMachOSection("__DATA", "__thread_bss",
+                                        MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+                                        0, SectionKind::getThreadBSS()),
+                     Sym, Size, 1 << Pow2Alignment);
+  
+  return false;
+}
+
 /// ParseDirectiveDarwinSubsectionsViaSymbols
 ///  ::= .subsections_via_symbols
 bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() {
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index f6e9636..a792d56 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -20,30 +20,3 @@ using namespace llvm;
 MCSection::~MCSection() {
 }
 
-//===----------------------------------------------------------------------===//
-// MCSectionCOFF
-//===----------------------------------------------------------------------===//
-
-MCSectionCOFF *MCSectionCOFF::
-Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
-  char *NameCopy = static_cast<char*>(
-    Ctx.Allocate(Name.size(), /*Alignment=*/1));
-  memcpy(NameCopy, Name.data(), Name.size());
-  return new (Ctx) MCSectionCOFF(StringRef(NameCopy, Name.size()),
-                                 IsDirective, K);
-}
-
-void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                         raw_ostream &OS) const {
-  
-  if (isDirective()) {
-    OS << getName() << '\n';
-    return;
-  }
-  OS << "\t.section\t" << getName() << ",\"";
-  if (getKind().isText())
-    OS << 'x';
-  if (getKind().isWriteable())
-    OS << 'w';
-  OS << "\"\n";
-}
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
new file mode 100644
index 0000000..d57bb0c
--- /dev/null
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -0,0 +1,76 @@
+//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+                                               const MCAsmInfo &MAI) const {
+  
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  if (Name == ".text" || Name == ".data" || Name == ".bss")
+    return true;
+
+  return false;
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                         raw_ostream &OS) const {
+  
+  // standard sections don't require the '.section'
+  if (ShouldOmitSectionDirective(SectionName, MAI)) {
+    OS << '\t' << getSectionName() << '\n';
+    return;
+  }
+
+  OS << "\t.section\t" << getSectionName() << ",\"";
+  if (getKind().isText())
+    OS << 'x';
+  if (getKind().isWriteable())
+    OS << 'w';
+  else
+    OS << 'r';
+  if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE)
+    OS << 'n';
+  OS << "\"\n";
+  
+  if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) {
+    switch (Selection) {
+      case IMAGE_COMDAT_SELECT_NODUPLICATES:
+        OS << "\t.linkonce one_only\n";
+        break;
+      case IMAGE_COMDAT_SELECT_ANY:
+        OS << "\t.linkonce discard\n";
+        break;
+      case IMAGE_COMDAT_SELECT_SAME_SIZE:
+        OS << "\t.linkonce same_size\n";
+        break;
+      case IMAGE_COMDAT_SELECT_EXACT_MATCH:
+        OS << "\t.linkonce same_contents\n";
+        break;
+    //NOTE: as of binutils 2.20, there is no way to specifiy select largest
+    //      with the .linkonce directive. For now, we treat it as an invalid
+    //      comdat selection value.
+      case IMAGE_COMDAT_SELECT_LARGEST:
+    //  OS << "\t.linkonce largest\n";
+    //  break;
+      default:
+        assert (0 && "unsupported COFF selection type");
+        break;
+    }
+  }
+}
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 3a18cee..ded3b20 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -34,7 +34,14 @@ static const struct {
   { "interposing",              "S_INTERPOSING" },                // 0x0D
   { "16byte_literals",          "S_16BYTE_LITERALS" },            // 0x0E
   { 0, /*FIXME??*/              "S_DTRACE_DOF" },                 // 0x0F
-  { 0, /*FIXME??*/              "S_LAZY_DYLIB_SYMBOL_POINTERS" }  // 0x10
+  { 0, /*FIXME??*/              "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+  { "thread_local_regular",     "S_THREAD_LOCAL_REGULAR" },       // 0x11
+  { "thread_local_zerofill",    "S_THREAD_LOCAL_ZEROFILL" },      // 0x12
+  { "thread_local_variables",   "S_THREAD_LOCAL_VARIABLES" },     // 0x13
+  { "thread_local_variable_pointers",
+    "S_THREAD_LOCAL_VARIABLE_POINTERS" },                         // 0x14
+  { "thread_local_init_function_pointers",
+    "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"},                     // 0x15
 };
 
 
@@ -66,7 +73,7 @@ ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
 
 MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
                                unsigned TAA, unsigned reserved2, SectionKind K)
-  : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+  : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
   assert(Segment.size() <= 16 && Section.size() <= 16 &&
          "Segment or section string too long");
   for (unsigned i = 0; i != 16; ++i) {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 4f484a2..573f2a3 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -48,7 +48,7 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
     EmitValue(E, 1, AddrSpace);
 }
 
-/// EmitRawText - If this file is backed by a assembly streamer, this dumps
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
 void MCStreamer::EmitRawText(StringRef String) {
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 3fb1233..07751f7 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -38,6 +39,17 @@ static bool NameNeedsQuoting(StringRef Str) {
   return false;
 }
 
+void MCSymbol::setVariableValue(const MCExpr *Value) {
+  assert(Value && "Invalid variable value!");
+  assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
+         "Invalid redefinition!");
+  this->Value = Value;
+
+  // Mark the variable as absolute as appropriate.
+  if (isa<MCConstantExpr>(Value))
+    setAbsolute();
+}
+
 void MCSymbol::print(raw_ostream &OS) const {
   // The name for this MCSymbol is required to be a valid target name.  However,
   // some targets support quoting names with funny characters.  If the name
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a533ccf..3207e99 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachO.h"
@@ -58,6 +59,20 @@ static bool isFixupKindRIPRel(unsigned Kind) {
     Kind == X86::reloc_riprel_4byte_movq_load;
 }
 
+static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
+  // Undefined symbols are always extern.
+  if (SD->Symbol->isUndefined())
+    return true;
+
+  // References to weak definitions require external relocation entries; the
+  // definition may not always be the one in the same object file.
+  if (SD->getFlags() & SF_WeakDefinition)
+    return true;
+
+  // Otherwise, we can use an internal relocation.
+  return false;
+}
+
 namespace {
 
 class MachObjectWriterImpl {
@@ -130,7 +145,8 @@ class MachObjectWriterImpl {
     RIT_Pair                = 1,
     RIT_Difference          = 2,
     RIT_PreboundLazyPointer = 3,
-    RIT_LocalDifference     = 4
+    RIT_LocalDifference     = 4,
+    RIT_TLV                 = 5
   };
 
   /// X86_64 uses its own relocation types.
@@ -143,7 +159,8 @@ class MachObjectWriterImpl {
     RIT_X86_64_Subtractor = 5,
     RIT_X86_64_Signed1    = 6,
     RIT_X86_64_Signed2    = 7,
-    RIT_X86_64_Signed4    = 8
+    RIT_X86_64_Signed4    = 8,
+    RIT_X86_64_TLV        = 9
   };
 
   /// MachSymbolData - Helper struct for containing some precomputed information
@@ -155,8 +172,8 @@ class MachObjectWriterImpl {
 
     // Support lexicographic sorting.
     bool operator<(const MachSymbolData &RHS) const {
-      const std::string &Name = SymbolData->getSymbol().getName();
-      return Name < RHS.SymbolData->getSymbol().getName();
+      return SymbolData->getSymbol().getName() <
+             RHS.SymbolData->getSymbol().getName();
     }
   };
 
@@ -170,6 +187,7 @@ class MachObjectWriterImpl {
 
   llvm::DenseMap<const MCSectionData*,
                  std::vector<MachRelocationEntry> > Relocations;
+  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
 
   /// @}
   /// @name Symbol Table Data
@@ -289,9 +307,7 @@ public:
     uint64_t Start = OS.tell();
     (void) Start;
 
-    // FIXME: cast<> support!
-    const MCSectionMachO &Section =
-      static_cast<const MCSectionMachO&>(SD.getSection());
+    const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
     WriteBytes(Section.getSectionName(), 16);
     WriteBytes(Section.getSegmentName(), 16);
     if (Is64Bit) {
@@ -312,7 +328,7 @@ public:
     Write32(NumRelocations ? RelocationsStart : 0);
     Write32(NumRelocations);
     Write32(Flags);
-    Write32(0); // reserved1
+    Write32(IndirectSymBase.lookup(&SD)); // reserved1
     Write32(Section.getStubSize()); // reserved2
     if (Is64Bit)
       Write32(0); // reserved3
@@ -404,7 +420,7 @@ public:
     // Compute the symbol address.
     if (Symbol.isDefined()) {
       if (Symbol.isAbsolute()) {
-        llvm_unreachable("FIXME: Not yet implemented!");
+        Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
       } else {
         Address = Layout.getSymbolAddress(&Data);
       }
@@ -456,14 +472,17 @@ public:
 
   void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                               const MCFragment *Fragment,
-                              const MCAsmFixup &Fixup, MCValue Target,
+                              const MCFixup &Fixup, MCValue Target,
                               uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
 
     // See <reloc.h>.
-    uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
+    uint32_t FixupOffset =
+      Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+    uint32_t FixupAddress =
+      Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
     int64_t Value = 0;
     unsigned Index = 0;
     unsigned IsExtern = 0;
@@ -532,7 +551,7 @@ public:
       Type = RIT_X86_64_Unsigned;
 
       MachRelocationEntry MRE;
-      MRE.Word0 = Address;
+      MRE.Word0 = FixupOffset;
       MRE.Word1 = ((Index     <<  0) |
                    (IsPCRel   << 24) |
                    (Log2Size  << 25) |
@@ -548,6 +567,17 @@ public:
       MCSymbolData &SD = Asm.getSymbolData(*Symbol);
       const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
 
+      // Relocations inside debug sections always use local relocations when
+      // possible. This seems to be done because the debugger doesn't fully
+      // understand x86_64 relocation entries, and expects to find values that
+      // have already been fixed up.
+      if (Symbol->isInSection()) {
+        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+          Fragment->getParent()->getSection());
+        if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+          Base = 0;
+      }
+
       // x86_64 almost always uses external relocations, except when there is no
       // symbol to use as a base address (a local symbol with no preceeding
       // non-local symbol).
@@ -558,14 +588,17 @@ public:
         // Add the local offset, if needed.
         if (Base != &SD)
           Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
-      } else {
+      } else if (Symbol->isInSection()) {
         // The index is the section ordinal (1-based).
         Index = SD.getFragment()->getParent()->getOrdinal() + 1;
         IsExtern = 0;
         Value += Layout.getSymbolAddress(&SD);
 
         if (IsPCRel)
-          Value -= Address + (1 << Log2Size);
+          Value -= FixupAddress + (1 << Log2Size);
+      } else {
+        report_fatal_error("unsupported relocation of undefined symbol '" +
+                           Symbol->getName() + "'");
       }
 
       MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -575,14 +608,37 @@ public:
             // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
             // rewrite the movq to an leaq at link time if the symbol ends up in
             // the same linkage unit.
-            if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load)
+            if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
               Type = RIT_X86_64_GOTLoad;
             else
               Type = RIT_X86_64_GOT;
-          } else if (Modifier != MCSymbolRefExpr::VK_None)
+          }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+            Type = RIT_X86_64_TLV;
+          }  else if (Modifier != MCSymbolRefExpr::VK_None) {
             report_fatal_error("unsupported symbol modifier in relocation");
-          else
+          } else {
             Type = RIT_X86_64_Signed;
+
+            // The Darwin x86_64 relocation format has a problem where it cannot
+            // encode an address (L<foo> + <constant>) which is outside the atom
+            // containing L<foo>. Generally, this shouldn't occur but it does
+            // happen when we have a RIPrel instruction with data following the
+            // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+            // adjustment Darwin x86_64 uses, the offset is still negative and
+            // the linker has no way to recognize this.
+            //
+            // To work around this, Darwin uses several special relocation types
+            // to indicate the offsets. However, the specification or
+            // implementation of these seems to also be incomplete; they should
+            // adjust the addend as well based on the actual encoded instruction
+            // (the additional bias), but instead appear to just look at the
+            // final offset.
+            switch (-(Target.getConstant() + (1LL << Log2Size))) {
+            case 1: Type = RIT_X86_64_Signed1; break;
+            case 2: Type = RIT_X86_64_Signed2; break;
+            case 4: Type = RIT_X86_64_Signed4; break;
+            }
+          }
         } else {
           if (Modifier != MCSymbolRefExpr::VK_None)
             report_fatal_error("unsupported symbol modifier in branch "
@@ -590,27 +646,6 @@ public:
 
           Type = RIT_X86_64_Branch;
         }
-
-        // The Darwin x86_64 relocation format has a problem where it cannot
-        // encode an address (L<foo> + <constant>) which is outside the atom
-        // containing L<foo>. Generally, this shouldn't occur but it does happen
-        // when we have a RIPrel instruction with data following the relocation
-        // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment
-        // Darwin x86_64 uses, the offset is still negative and the linker has
-        // no way to recognize this.
-        //
-        // To work around this, Darwin uses several special relocation types to
-        // indicate the offsets. However, the specification or implementation of
-        // these seems to also be incomplete; they should adjust the addend as
-        // well based on the actual encoded instruction (the additional bias),
-        // but instead appear to just look at the final offset.
-        if (IsRIPRel) {
-          switch (-(Target.getConstant() + (1LL << Log2Size))) {
-          case 1: Type = RIT_X86_64_Signed1; break;
-          case 2: Type = RIT_X86_64_Signed2; break;
-          case 4: Type = RIT_X86_64_Signed4; break;
-          }
-        }
       } else {
         if (Modifier == MCSymbolRefExpr::VK_GOT) {
           Type = RIT_X86_64_GOT;
@@ -621,6 +656,8 @@ public:
           // required to include any necessary offset directly.
           Type = RIT_X86_64_GOT;
           IsPCRel = 1;
+        } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+          report_fatal_error("TLVP symbol modifier should have been rip-rel");
         } else if (Modifier != MCSymbolRefExpr::VK_None)
           report_fatal_error("unsupported symbol modifier in relocation");
         else
@@ -633,7 +670,7 @@ public:
 
     // struct relocation_info (8 bytes)
     MachRelocationEntry MRE;
-    MRE.Word0 = Address;
+    MRE.Word0 = FixupOffset;
     MRE.Word1 = ((Index     <<  0) |
                  (IsPCRel   << 24) |
                  (Log2Size  << 25) |
@@ -645,11 +682,11 @@ public:
   void RecordScatteredRelocation(const MCAssembler &Asm,
                                  const MCAsmLayout &Layout,
                                  const MCFragment *Fragment,
-                                 const MCAsmFixup &Fixup, MCValue Target,
+                                 const MCFixup &Fixup, MCValue Target,
                                  uint64_t &FixedValue) {
-    uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
     unsigned Type = RIT_Vanilla;
 
     // See <reloc.h>.
@@ -692,40 +729,49 @@ public:
     }
 
     MachRelocationEntry MRE;
-    MRE.Word0 = ((Address   <<  0) |
-                 (Type      << 24) |
-                 (Log2Size  << 28) |
-                 (IsPCRel   << 30) |
+    MRE.Word0 = ((FixupOffset <<  0) |
+                 (Type        << 24) |
+                 (Log2Size    << 28) |
+                 (IsPCRel     << 30) |
                  RF_Scattered);
     MRE.Word1 = Value;
     Relocations[Fragment->getParent()].push_back(MRE);
   }
 
   void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCAsmFixup &Fixup,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
                         MCValue Target, uint64_t &FixedValue) {
     if (Is64Bit) {
       RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
       return;
     }
 
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
 
     // If this is a difference or a defined symbol plus an offset, then we need
     // a scattered relocation entry.
+    // Differences always require scattered relocations.
+    if (Target.getSymB())
+        return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                         Target, FixedValue);
+
+    // Get the symbol data, if any.
+    MCSymbolData *SD = 0;
+    if (Target.getSymA())
+      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+    // If this is an internal relocation with an offset, it also needs a
+    // scattered relocation entry.
     uint32_t Offset = Target.getConstant();
     if (IsPCRel)
       Offset += 1 << Log2Size;
-    if (Target.getSymB() ||
-        (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() &&
-         Offset)) {
-      RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue);
-      return;
-    }
+    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+      return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                       Target, FixedValue);
 
     // See <reloc.h>.
-    uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
     uint32_t Value = 0;
     unsigned Index = 0;
     unsigned IsExtern = 0;
@@ -739,12 +785,15 @@ public:
       Type = RIT_Vanilla;
       Value = 0;
     } else {
-      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-      MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
-
-      if (Symbol->isUndefined()) {
+      // Check whether we need an external or internal relocation.
+      if (doesSymbolRequireExternRelocation(SD)) {
         IsExtern = 1;
         Index = SD->getIndex();
+        // For external relocations, make sure to offset the fixup value to
+        // compensate for the addend of the symbol address, if it was
+        // undefined. This occurs with weak definitions, for example.
+        if (!SD->Symbol->isUndefined())
+          FixedValue -= Layout.getSymbolAddress(SD);
         Value = 0;
       } else {
         // The index is the section ordinal (1-based).
@@ -757,7 +806,7 @@ public:
 
     // struct relocation_info (8 bytes)
     MachRelocationEntry MRE;
-    MRE.Word0 = Address;
+    MRE.Word0 = FixupOffset;
     MRE.Word1 = ((Index     <<  0) |
                  (IsPCRel   << 24) |
                  (Log2Size  << 25) |
@@ -775,29 +824,37 @@ public:
     // FIXME: Revisit this when the dust settles.
 
     // Bind non lazy symbol pointers first.
+    unsigned IndirectIndex = 0;
     for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-      // FIXME: cast<> support!
+           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
       const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+        cast<MCSectionMachO>(it->SectionData->getSection());
 
       if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
         continue;
 
+      // Initialize the section indirect symbol base, if necessary.
+      if (!IndirectSymBase.count(it->SectionData))
+        IndirectSymBase[it->SectionData] = IndirectIndex;
+      
       Asm.getOrCreateSymbolData(*it->Symbol);
     }
 
     // Then lazy symbol pointers and symbol stubs.
+    IndirectIndex = 0;
     for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-      // FIXME: cast<> support!
+           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
       const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+        cast<MCSectionMachO>(it->SectionData->getSection());
 
       if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
           Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
         continue;
 
+      // Initialize the section indirect symbol base, if necessary.
+      if (!IndirectSymBase.count(it->SectionData))
+        IndirectSymBase[it->SectionData] = IndirectIndex;
+
       // Set the symbol type to undefined lazy, but only on construction.
       //
       // FIXME: Do not hardcode.
@@ -1111,7 +1168,7 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
 void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
                                         const MCAsmLayout &Layout,
                                         const MCFragment *Fragment,
-                                        const MCAsmFixup &Fixup, MCValue Target,
+                                        const MCFixup &Fixup, MCValue Target,
                                         uint64_t &FixedValue) {
   ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
                                                    Target, FixedValue);
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 50025d2..1341d21 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1382,13 +1382,12 @@ APInt APInt::sqrt() const {
   // libc sqrt function which will probably use a hardware sqrt computation.
   // This should be faster than the algorithm below.
   if (magnitude < 52) {
-#if defined( _MSC_VER ) || defined(_MINIX)
-    // Amazingly, VC++ and Minix don't have round().
+#if HAVE_ROUND
     return APInt(BitWidth,
-                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
+                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
 #else
     return APInt(BitWidth,
-                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
 #endif
   }
 
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index d31f34e..ae66110 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1170,7 +1170,9 @@ public:
     std::string CPU = sys::getHostCPUName();
     if (CPU == "generic") CPU = "(unknown)";
     OS << ".\n"
+#if (ENABLE_TIMESTAMPS == 1)
        << "  Built " << __DATE__ << " (" << __TIME__ << ").\n"
+#endif
        << "  Host: " << sys::getHostTriple() << '\n'
        << "  Host CPU: " << CPU << '\n'
        << '\n'
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 56a171c..7e7ca9d 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
 #include "llvm/System/Threading.h"
 #include <cassert>
 #include <cstdlib>
@@ -52,6 +53,12 @@ void llvm::report_fatal_error(const Twine &reason) {
   } else {
     ErrorHandler(ErrorHandlerUserData, reason.str());
   }
+
+  // If we reached here, we are failing ungracefully. Run the interrupt handlers
+  // to make sure any special cleanups get done, in particular that we remove
+  // files registered with RemoveFileOnSignal.
+  sys::RunInterruptHandlers();
+
   exit(1);
 }
 
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 68b41a7..7a04a53 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -50,8 +50,8 @@ static void PrintCurStackTrace(raw_ostream &OS) {
 
 // Integrate with crash reporter.
 #ifdef __APPLE__
-extern "C" const char *__crashreporter_info__;
-const char *__crashreporter_info__ = 0;
+static const char *__crashreporter_info__ = 0;
+asm(".desc ___crashreporter_info__, 0x10");
 #endif
 
 
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 2b262dc..ca0f518 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -23,6 +23,10 @@ static char ascii_tolower(char x) {
   return x;
 }
 
+static bool ascii_isdigit(char x) {
+  return x >= '0' && x <= '9';
+}
+
 /// compare_lower - Compare strings, ignoring case.
 int StringRef::compare_lower(StringRef RHS) const {
   for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
@@ -37,6 +41,30 @@ int StringRef::compare_lower(StringRef RHS) const {
   return Length < RHS.Length ? -1 : 1;
 }
 
+/// compare_numeric - Compare strings, handle embedded numbers.
+int StringRef::compare_numeric(StringRef RHS) const {
+  for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+    if (Data[I] == RHS.Data[I])
+      continue;
+    if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
+      // The longer sequence of numbers is larger. This doesn't really handle
+      // prefixed zeros well.
+      for (size_t J = I+1; J != E+1; ++J) {
+        bool ld = J < Length && ascii_isdigit(Data[J]);
+        bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+        if (ld != rd)
+          return rd ? -1 : 1;
+        if (!rd)
+          break;
+      }
+    }
+    return Data[I] < RHS.Data[I] ? -1 : 1;
+  }
+  if (Length == RHS.Length)
+        return 0;
+  return Length < RHS.Length ? -1 : 1;
+}
+
 // Compute the edit distance between the two given strings.
 unsigned StringRef::edit_distance(llvm::StringRef Other, 
                                   bool AllowReplacements) {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 481f6ba..784b77c 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -61,6 +61,10 @@ raw_ostream *llvm::CreateInfoOutputFile() {
   if (OutputFilename == "-")
     return new raw_fd_ostream(1, false); // stdout.
   
+  // Append mode is used because the info output file is opened and closed
+  // each time -stats or -time-passes wants to print output to it. To
+  // compensate for this, the test-suite Makefiles have code to delete the
+  // info output file before running commands which write to it.
   std::string Error;
   raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
                                            Error, raw_fd_ostream::F_Append);
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 21504e9..b3ea013 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -48,10 +48,10 @@ void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
     OS << *static_cast<const StringRef*>(Ptr); 
     break;
   case Twine::DecUIKind:
-    OS << *static_cast<const unsigned int*>(Ptr);
+    OS << (unsigned)(uintptr_t)Ptr;
     break;
   case Twine::DecIKind:
-    OS << *static_cast<const int*>(Ptr);
+    OS << (int)(intptr_t)Ptr;
     break;
   case Twine::DecULKind:
     OS << *static_cast<const unsigned long*>(Ptr);
@@ -95,10 +95,10 @@ void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
        << static_cast<const StringRef*>(Ptr) << "\"";
     break;
   case Twine::DecUIKind:
-    OS << "decUI:\"" << *static_cast<const unsigned int*>(Ptr) << "\"";
+    OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\"";
     break;
   case Twine::DecIKind:
-    OS << "decI:\"" << *static_cast<const int*>(Ptr) << "\"";
+    OS << "decI:\"" << (int)(intptr_t)Ptr << "\"";
     break;
   case Twine::DecULKind:
     OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 0b05c54..11cf0ec 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cctype>
+#include <cerrno>
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -399,37 +400,76 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
   if (Flags & F_Excl)
     OpenFlags |= O_EXCL;
 
-  FD = open(Filename, OpenFlags, 0664);
-  if (FD < 0) {
-    ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
-    ShouldClose = false;
-  } else {
-    ShouldClose = true;
+  while ((FD = open(Filename, OpenFlags, 0664)) < 0) {
+    if (errno != EINTR) {
+      ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+      ShouldClose = false;
+      return;
+    }
   }
+
+  // Ok, we successfully opened the file, so it'll need to be closed.
+  ShouldClose = true;
 }
 
 raw_fd_ostream::~raw_fd_ostream() {
   if (FD < 0) return;
   flush();
   if (ShouldClose)
-    if (::close(FD) != 0)
-      error_detected();
+    while (::close(FD) != 0)
+      if (errno != EINTR) {
+        error_detected();
+        break;
+      }
 }
 
 
 void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   assert(FD >= 0 && "File already closed.");
   pos += Size;
-  if (::write(FD, Ptr, Size) != (ssize_t) Size)
-    error_detected();
+  ssize_t ret;
+
+  do {
+    ret = ::write(FD, Ptr, Size);
+
+    if (ret < 0) {
+      // If it's a recoverable error, swallow it and retry the write.
+      //
+      // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
+      // raw_ostream isn't designed to do non-blocking I/O. However, some
+      // programs, such as old versions of bjam, have mistakenly used
+      // O_NONBLOCK. For compatibility, emulate blocking semantics by
+      // spinning until the write succeeds. If you don't want spinning,
+      // don't use O_NONBLOCK file descriptors with raw_ostream.
+      if (errno == EINTR || errno == EAGAIN
+#ifdef EWOULDBLOCK
+          || errno == EWOULDBLOCK
+#endif
+          )
+        continue;
+
+      // Otherwise it's a non-recoverable error. Note it and quit.
+      error_detected();
+      break;
+    }
+
+    // The write may have written some or all of the data. Update the
+    // size and buffer pointer to reflect the remainder that needs
+    // to be written. If there are no bytes left, we're done.
+    Ptr += ret;
+    Size -= ret;
+  } while (Size > 0);
 }
 
 void raw_fd_ostream::close() {
   assert(ShouldClose);
   ShouldClose = false;
   flush();
-  if (::close(FD) != 0)
-    error_detected();
+  while (::close(FD) != 0)
+    if (errno != EINTR) {
+      error_detected();
+      break;
+    }
   FD = -1;
 }
 
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 56bf9e7..9548816 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -152,7 +152,9 @@ static RETSIGTYPE SignalHandler(int Sig) {
     CallBacksToRun[i].first(CallBacksToRun[i].second);
 }
 
-
+void llvm::sys::RunInterruptHandlers() {
+  SignalHandler(SIGINT);
+}
 
 void llvm::sys::SetInterruptFunction(void (*IF)()) {
   SignalsMutex.acquire();
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index f2b72ca..a3a393c 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -189,6 +189,10 @@ static void Cleanup() {
   LeaveCriticalSection(&CriticalSection);
 }
 
+void llvm::sys::RunInterruptHandlers() {
+  Cleanup();
+}
+
 static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
   try {
     Cleanup();
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index b08f942..ae7ae59 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -48,7 +48,7 @@ namespace ARMCC {
     AL
   };
 
-  inline static CondCodes getOppositeCondition(CondCodes CC){
+  inline static CondCodes getOppositeCondition(CondCodes CC) {
     switch (CC) {
     default: llvm_unreachable("Unknown condition code");
     case EQ: return NE;
@@ -67,7 +67,7 @@ namespace ARMCC {
     case LE: return GT;
     }
   }
-}
+} // namespace ARMCC
 
 inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   switch (CC) {
@@ -90,6 +90,10 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   }
 }
 
+/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
+/// operations involving sub-registers.
+bool ModelWithRegSequence();
+
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index b4dec0c..f1e6a9f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,6 +32,8 @@ def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
                                    "ARM v6t2">;
 def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
                                    "ARM v7A">;
+def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+                                   "ARM v7M">;
 def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
                                    "Enable VFP2 instructions">;
 def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,6 +44,10 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
                                      "Enable Thumb2 instructions">;
 def FeatureFP16   : SubtargetFeature<"fp16", "HasFP16", "true",
                                      "Enable half-precision floating point">;
+def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+                                     "Enable divide instructions">;
+def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+                                 "Enable Thumb2 extract and pack instructions">;
 
 // Some processors have multiply-accumulate instructions that don't
 // play nicely with other VFP instructions, and it's generally better
@@ -123,9 +129,11 @@ def : Processor<"arm1156t2f-s",    ARMV6Itineraries,
 // V7 Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
-                 FeatureNEONForFP]>;
+                 FeatureNEONForFP, FeatureT2ExtractPack]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
-                [ArchV7A, FeatureThumb2, FeatureNEON]>;
+                [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
+def : ProcNoItin<"cortex-m3",       [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+def : ProcNoItin<"cortex-m4",       [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index a193858..2528854 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
@@ -196,6 +197,42 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   return NewMIs[0];
 }
 
+bool
+ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                            MachineBasicBlock::iterator MI,
+                                            const std::vector<CalleeSavedInfo> &CSI,
+                                            const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    bool isKill = true;
+
+    // Add the callee-saved register as live-in unless it's LR and
+    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+    // then it's already added to the function and entry block live-in sets.
+    if (Reg == ARM::LR) {
+      MachineFunction &MF = *MBB.getParent();
+      if (MF.getFrameInfo()->isReturnAddressTaken() &&
+          MF.getRegInfo().isLiveIn(Reg))
+        isKill = false;
+    }
+
+    if (isKill)
+      MBB.addLiveIn(Reg);
+
+    // Insert the spill to the stack frame. The register is killed at the spill
+    // 
+    storeRegToStackSlot(MBB, MI, Reg, isKill,
+                        CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI);
+  }
+  return true;
+}
+
 // Branch analysis.
 bool
 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
@@ -481,6 +518,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // If this machine instr is a constant pool entry, its size is recorded as
       // operand #2.
       return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_longjmp:
+      return 16;
+    case ARM::tInt_eh_sjlj_longjmp:
+      return 10;
     case ARM::Int_eh_sjlj_setjmp:
     case ARM::Int_eh_sjlj_setjmp_nofp:
       return 24;
@@ -540,16 +581,17 @@ bool
 ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
                               unsigned &SrcReg, unsigned &DstReg,
                               unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
-  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
   switch (MI.getOpcode()) {
   default: break;
   case ARM::VMOVS:
   case ARM::VMOVD:
   case ARM::VMOVDneon:
-  case ARM::VMOVQ: {
+  case ARM::VMOVQ:
+  case ARM::VMOVQQ : {
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
+    SrcSubIdx = MI.getOperand(1).getSubReg();
+    DstSubIdx = MI.getOperand(0).getSubReg();
     return true;
   }
   case ARM::MOVr:
@@ -564,6 +606,8 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
            "Invalid ARM MOV instruction");
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
+    SrcSubIdx = MI.getOperand(1).getSubReg();
+    DstSubIdx = MI.getOperand(0).getSubReg();
     return true;
   }
   }
@@ -654,10 +698,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I,
                                unsigned DestReg, unsigned SrcReg,
                                const TargetRegisterClass *DestRC,
-                               const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                               const TargetRegisterClass *SrcRC,
+                               DebugLoc DL) const {
   // tGPR is used sometimes in ARM instructions that need to avoid using
   // certain registers.  Just treat it as GPR here.
   if (DestRC == ARM::tGPRRegisterClass)
@@ -679,6 +721,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
       SrcRC == ARM::QPR_8RegisterClass)
     SrcRC = ARM::QPRRegisterClass;
 
+  // Allow QQPR / QQPR_VFP2 cross-class copies.
+  if (DestRC == ARM::QQPR_VFP2RegisterClass)
+    DestRC = ARM::QQPRRegisterClass;
+  if (SrcRC == ARM::QQPR_VFP2RegisterClass)
+    SrcRC = ARM::QQPRRegisterClass;
+
   // Disallow copies of unequal sizes.
   if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
     return false;
@@ -703,20 +751,36 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
       Opc = ARM::VMOVDneon;
     else if (DestRC == ARM::QPRRegisterClass)
       Opc = ARM::VMOVQ;
+    else if (DestRC == ARM::QQPRRegisterClass)
+      Opc = ARM::VMOVQQ;
+    else if (DestRC == ARM::QQQQPRRegisterClass)
+      Opc = ARM::VMOVQQQQ;
     else
       return false;
 
-    AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg)
-                   .addReg(SrcReg));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
   }
 
   return true;
 }
 
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+                             unsigned Reg, unsigned SubIdx, unsigned State,
+                             const TargetRegisterInfo *TRI) {
+  if (!SubIdx)
+    return MIB.addReg(Reg, State);
+
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+  return MIB.addReg(Reg, State, SubIdx);
+}
+
 void ARMBaseInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC) const {
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
@@ -738,45 +802,82 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+  } else if (RC == ARM::SPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else if (RC == ARM::DPRRegisterClass ||
              RC == ARM::DPR_VFP2RegisterClass ||
              RC == ARM::DPR_8RegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
-  } else if (RC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
-                   .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
-  } else {
-    assert((RC == ARM::QPRRegisterClass ||
-            RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
+  } else if (RC == ARM::QPRRegisterClass ||
+             RC == ARM::QPR_VFP2RegisterClass ||
+             RC == ARM::QPR_8RegisterClass) {
     // FIXME: Neon instructions should support predicates
-    if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
                      .addFrameIndex(FI).addImm(128)
-                     .addMemOperand(MMO)
-                     .addReg(SrcReg, getKillRegState(isKill)));
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
     } else {
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)).
-                     addReg(SrcReg, getKillRegState(isKill))
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
+                     .addReg(SrcReg, getKillRegState(isKill))
                      .addFrameIndex(FI)
                      .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
                      .addMemOperand(MMO));
     }
+  } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+      // FIXME: It's possible to only store part of the QQ register if the
+      // spilled def has a sub-register index.
+      MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32))
+        .addFrameIndex(FI).addImm(128);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+      AddDefaultPred(MIB.addMemOperand(MMO));
+    } else {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+                       .addFrameIndex(FI)
+                       .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+            AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+    }
+  } else {
+    assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+                     .addFrameIndex(FI)
+                     .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+      .addMemOperand(MMO);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+          AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
   }
 }
 
 void ARMBaseInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const {
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
-
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
                             MachineMemOperand::MOLoad, 0,
@@ -791,20 +892,18 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (RC == ARM::GPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
                    .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+  } else if (RC == ARM::SPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else if (RC == ARM::DPRRegisterClass ||
              RC == ARM::DPR_VFP2RegisterClass ||
              RC == ARM::DPR_8RegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
-  } else if (RC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
-                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
-  } else {
-    assert((RC == ARM::QPRRegisterClass ||
-            RC == ARM::QPR_VFP2RegisterClass ||
-            RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
-    if (Align >= 16
-        && (getRegisterInfo().canRealignStack(MF))) {
+  } else if (RC == ARM::QPRRegisterClass ||
+             RC == ARM::QPR_VFP2RegisterClass ||
+             RC == ARM::QPR_8RegisterClass) {
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
                      .addFrameIndex(FI).addImm(128)
                      .addMemOperand(MMO));
@@ -814,6 +913,40 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
                      .addMemOperand(MMO));
     }
+  } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+      MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32));
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+      AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO));
+    } else {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+                       .addFrameIndex(FI)
+                       .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+            AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+    }
+  } else {
+    assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+                       .addFrameIndex(FI)
+                       .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+            AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
   }
 }
 
@@ -930,8 +1063,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                 DstSubReg)
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
-  }
-  else if (Opc == ARM::VMOVD) {
+  } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) {
     unsigned Pred = MI->getOperand(2).getImm();
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
@@ -957,6 +1089,56 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                 DstSubReg)
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
+  }  else if (Opc == ARM::VMOVQ) {
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
+      if (MFI.getObjectAlignment(FI) >= 16 &&
+          getRegisterInfo().canRealignStack(MF)) {
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q))
+          .addFrameIndex(FI).addImm(128)
+          .addReg(SrcReg,
+                  getKillRegState(isKill) | getUndefRegState(isUndef),
+                  SrcSubReg)
+          .addImm(Pred).addReg(PredReg);
+      } else {
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ))
+          .addReg(SrcReg,
+                  getKillRegState(isKill) | getUndefRegState(isUndef),
+                  SrcSubReg)
+          .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+          .addImm(Pred).addReg(PredReg);
+      }
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned DstSubReg = MI->getOperand(0).getSubReg();
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      if (MFI.getObjectAlignment(FI) >= 16 &&
+          getRegisterInfo().canRealignStack(MF)) {
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q))
+          .addReg(DstReg,
+                  RegState::Define |
+                  getDeadRegState(isDead) |
+                  getUndefRegState(isUndef),
+                  DstSubReg)
+          .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg);
+      } else {
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ))
+          .addReg(DstReg,
+                  RegState::Define |
+                  getDeadRegState(isDead) |
+                  getUndefRegState(isUndef),
+                  DstSubReg)
+          .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+          .addImm(Pred).addReg(PredReg);
+      }
+    }
   }
 
   return NewMI;
@@ -985,12 +1167,13 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
              Opc == ARM::tMOVtgpr2gpr ||
              Opc == ARM::tMOVgpr2tgpr) {
     return true;
-  } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
+  } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD ||
+             Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
     return true;
-  } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
-    return false; // FIXME
   }
 
+  // FIXME: VMOVQQ and VMOVQQQQ?
+
   return false;
 }
 
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7a5630e..b566271 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -200,6 +200,11 @@ public:
   virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
   const ARMSubtarget &getSubtarget() const { return Subtarget; }
 
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
   // Branch analysis.
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
@@ -257,17 +262,20 @@ public:
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
 
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
                                                  int FrameIx,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index bc12187..82458d2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -259,10 +259,10 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
                                               unsigned SubIdx) const {
   switch (SubIdx) {
   default: return 0;
-  case 1:
-  case 2:
-  case 3:
-  case 4:
+  case ARM::ssub_0:
+  case ARM::ssub_1:
+  case ARM::ssub_2:
+  case ARM::ssub_3: {
     // S sub-registers.
     if (A->getSize() == 8) {
       if (B == &ARM::SPR_8RegClass)
@@ -273,22 +273,201 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
       return &ARM::DPR_VFP2RegClass;
     }
 
-    assert(A->getSize() == 16 && "Expecting a Q register class!");
-    if (B == &ARM::SPR_8RegClass)
-      return &ARM::QPR_8RegClass;
-    return &ARM::QPR_VFP2RegClass;
-  case 5:
-  case 6:
-    // D sub-registers.
-    if (B == &ARM::DPR_VFP2RegClass)
+    if (A->getSize() == 16) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::QPR_8RegClass;
       return &ARM::QPR_VFP2RegClass;
-    if (B == &ARM::DPR_8RegClass)
-      return &ARM::QPR_8RegClass;
+    }
+
+    if (A->getSize() == 32) {
+      if (B == &ARM::SPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return &ARM::QQPR_VFP2RegClass;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    return 0;  // Do not allow coalescing!
+  }
+  case ARM::dsub_0:
+  case ARM::dsub_1:
+  case ARM::dsub_2:
+  case ARM::dsub_3: {
+    // D sub-registers.
+    if (A->getSize() == 16) {
+      if (B == &ARM::DPR_VFP2RegClass)
+        return &ARM::QPR_VFP2RegClass;
+      if (B == &ARM::DPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    if (A->getSize() == 32) {
+      if (B == &ARM::DPR_VFP2RegClass)
+        return &ARM::QQPR_VFP2RegClass;
+      if (B == &ARM::DPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    if (B != &ARM::DPRRegClass)
+      return 0;  // Do not allow coalescing!
     return A;
   }
+  case ARM::dsub_4:
+  case ARM::dsub_5:
+  case ARM::dsub_6:
+  case ARM::dsub_7: {
+    // D sub-registers of QQQQ registers.
+    if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+
+  case ARM::qsub_0:
+  case ARM::qsub_1: {
+    // Q sub-registers.
+    if (A->getSize() == 32) {
+      if (B == &ARM::QPR_VFP2RegClass)
+        return &ARM::QQPR_VFP2RegClass;
+      if (B == &ARM::QPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    if (B == &ARM::QPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+  case ARM::qsub_2:
+  case ARM::qsub_3: {
+    // Q sub-registers of QQQQ registers.
+    if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+  }
   return 0;
 }
 
+bool
+ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+                                          SmallVectorImpl<unsigned> &SubIndices,
+                                          unsigned &NewSubIdx) const {
+
+  unsigned Size = RC->getSize() * 8;
+  if (Size < 6)
+    return 0;
+
+  NewSubIdx = 0;  // Whole register.
+  unsigned NumRegs = SubIndices.size();
+  if (NumRegs == 8) {
+    // 8 D registers -> 1 QQQQ register.
+    return (Size == 512 &&
+            SubIndices[0] == ARM::dsub_0 &&
+            SubIndices[1] == ARM::dsub_1 &&
+            SubIndices[2] == ARM::dsub_2 &&
+            SubIndices[3] == ARM::dsub_3 &&
+            SubIndices[4] == ARM::dsub_4 &&
+            SubIndices[5] == ARM::dsub_5 &&
+            SubIndices[6] == ARM::dsub_6 &&
+            SubIndices[7] == ARM::dsub_7);
+  } else if (NumRegs == 4) {
+    if (SubIndices[0] == ARM::qsub_0) {
+      // 4 Q registers -> 1 QQQQ register.
+      return (Size == 512 &&
+              SubIndices[1] == ARM::qsub_1 &&
+              SubIndices[2] == ARM::qsub_2 &&
+              SubIndices[3] == ARM::qsub_3);
+    } else if (SubIndices[0] == ARM::dsub_0) {
+      // 4 D registers -> 1 QQ register.
+      if (Size >= 256 &&
+          SubIndices[1] == ARM::dsub_1 &&
+          SubIndices[2] == ARM::dsub_2 &&
+          SubIndices[3] == ARM::dsub_3) {
+        if (Size == 512)
+          NewSubIdx = ARM::qqsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_4) {
+      // 4 D registers -> 1 QQ register (2nd).
+      if (Size == 512 &&
+          SubIndices[1] == ARM::dsub_5 &&
+          SubIndices[2] == ARM::dsub_6 &&
+          SubIndices[3] == ARM::dsub_7) {
+        NewSubIdx = ARM::qqsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_0) {
+      // 4 S registers -> 1 Q register.
+      if (Size >= 128 &&
+          SubIndices[1] == ARM::ssub_1 &&
+          SubIndices[2] == ARM::ssub_2 &&
+          SubIndices[3] == ARM::ssub_3) {
+        if (Size >= 256)
+          NewSubIdx = ARM::qsub_0;
+        return true;
+      }
+    }
+  } else if (NumRegs == 2) {
+    if (SubIndices[0] == ARM::qsub_0) {
+      // 2 Q registers -> 1 QQ register.
+      if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+        if (Size == 512)
+          NewSubIdx = ARM::qqsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::qsub_2) {
+      // 2 Q registers -> 1 QQ register (2nd).
+      if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+        NewSubIdx = ARM::qqsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_0) {
+      // 2 D registers -> 1 Q register.
+      if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+        if (Size >= 256)
+          NewSubIdx = ARM::qsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_2) {
+      // 2 D registers -> 1 Q register (2nd).
+      if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+        NewSubIdx = ARM::qsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_4) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+        NewSubIdx = ARM::qsub_2;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_6) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+        NewSubIdx = ARM::qsub_3;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_0) {
+      // 2 S registers -> 1 D register.
+      if (SubIndices[1] == ARM::ssub_1) {
+        if (Size >= 128)
+          NewSubIdx = ARM::dsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_2) {
+      // 2 S registers -> 1 D register (2nd).
+      if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+        NewSubIdx = ARM::dsub_1;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
 const TargetRegisterClass *
 ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return ARM::GPRRegisterClass;
@@ -481,7 +660,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 ///
 bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return ((DisableFramePointerElim(MF) && MFI->hasCalls())||
+  return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
           needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
@@ -509,7 +688,7 @@ needsStackRealignment(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (DisableFramePointerElim(MF) && MFI->hasCalls())
+  if (DisableFramePointerElim(MF) && MFI->adjustsStack())
     return true;
   return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
     || needsStackRealignment(MF);
@@ -545,24 +724,25 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
          I != E; ++I) {
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
         if (!I->getOperand(i).isFI()) continue;
-
-        const TargetInstrDesc &Desc = TII.get(I->getOpcode());
-        unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-        if (AddrMode == ARMII::AddrMode3 ||
-            AddrMode == ARMII::AddrModeT2_i8)
-          return (1 << 8) - 1;
-
-        if (AddrMode == ARMII::AddrMode5 ||
-            AddrMode == ARMII::AddrModeT2_i8s4)
+        switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+        case ARMII::AddrMode3:
+        case ARMII::AddrModeT2_i8:
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode5:
+        case ARMII::AddrModeT2_i8s4:
           Limit = std::min(Limit, ((1U << 8) - 1) * 4);
-
-        if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF))
-          // When the stack offset is negative, we will end up using
-          // the i8 instructions instead.
-          return (1 << 8) - 1;
-
-        if (AddrMode == ARMII::AddrMode6)
+          break;
+        case ARMII::AddrModeT2_i12:
+          if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode6:
+          // Addressing mode 6 (load/store) instructions can't encode an
+          // immediate offset for stack references.
           return 0;
+        default:
+          break;
+        }
         break; // At most one FI per instruction
       }
     }
@@ -750,7 +930,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
       while (NumExtras && !UnspilledCS1GPRs.empty()) {
         unsigned Reg = UnspilledCS1GPRs.back();
         UnspilledCS1GPRs.pop_back();
-        if (!isReservedReg(MF, Reg)) {
+        if (!isReservedReg(MF, Reg) &&
+            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+             Reg == ARM::LR)) {
           Extras.push_back(Reg);
           NumExtras--;
         }
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 456c392..2c9c82d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -81,6 +81,15 @@ public:
   getMatchingSuperRegClass(const TargetRegisterClass *A,
                            const TargetRegisterClass *B, unsigned Idx) const;
 
+  /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+  /// indices, return true if it's possible to combine the sub-register indices
+  /// into one that corresponds to a larger sub-register. Return the new sub-
+  /// register index by reference. Note the new index by be zero if the given
+  /// sub-registers combined to form the whole register.
+  virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+                                      SmallVectorImpl<unsigned> &SubIndices,
+                                      unsigned &NewSubIdx) const;
+
   const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
 
   std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f84f85a..f2730fc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -88,6 +88,7 @@ namespace {
     void emitWordLE(unsigned Binary);
     void emitDWordLE(uint64_t Binary);
     void emitConstPoolInstruction(const MachineInstr &MI);
+    void emitMOVi32immInstruction(const MachineInstr &MI);
     void emitMOVi2piecesInstruction(const MachineInstr &MI);
     void emitLEApcrelJTInstruction(const MachineInstr &MI);
     void emitPseudoMoveInstruction(const MachineInstr &MI);
@@ -145,6 +146,15 @@ namespace {
       return getMachineOpValue(MI, MI.getOperand(OpIdx));
     }
 
+    /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+    /// machine operand requires relocation, record the relocation and return zero.
+    unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+                            unsigned Reloc);
+    unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
+                            unsigned Reloc) {
+      return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
+    }
+
     /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
     ///
     unsigned getShiftOp(unsigned Imm) const ;
@@ -217,6 +227,31 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
   return 0;
 }
 
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+                                        const MachineOperand &MO,
+                                        unsigned Reloc) {
+  assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+      && "Relocation to this function should be for movt or movw");
+
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), Reloc);
+  else {
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable("Unsupported operand type for movw/movt");
+  }
+  return 0;
+}
+
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
 unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -438,6 +473,42 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
   }
 }
 
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+
+  // Emit the 'movw' instruction.
+  unsigned Binary = 0x30 << 20;  // mov: Insts{27-20} = 0b00110000
+
+  unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm16 as imm4:imm12
+  Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+  Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+  emitWordLE(Binary);
+
+  unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+  // Emit the 'movt' instruction.
+  Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm16 as imm4:imm1, same as movw above.
+  Binary |= Hi16 & 0xFFF;
+  Binary |= ((Hi16 >> 12) & 0xF) << 16;
+  emitWordLE(Binary);
+}
+
 void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   const MachineOperand &MO0 = MI.getOperand(0);
   const MachineOperand &MO1 = MI.getOperand(1);
@@ -557,7 +628,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   switch (Opcode) {
   default:
     llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
-  // FIXME: Add support for MOVimm32.
   case TargetOpcode::INLINEASM: {
     // We allow inline assembler nodes with empty bodies - they can
     // implicitly define registers, which is ok for JIT.
@@ -604,6 +674,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
     emitMiscLoadStoreInstruction(MI, ARM::PC);
     break;
   }
+
+  case ARM::MOVi32imm:
+    emitMOVi32immInstruction(MI);
+    break;
+
   case ARM::MOVi2pieces:
     // Two instructions to materialize a constant.
     emitMOVi2piecesInstruction(MI);
@@ -706,10 +781,6 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
                                                    unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
 
-  if (TID.Opcode == ARM::BFC) {
-    report_fatal_error("ARMv6t2 JIT is not yet supported.");
-  }
-
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
@@ -729,6 +800,45 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
     Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
                << ARMII::RegRdShift);
 
+  if (TID.Opcode == ARM::MOVi16) {
+      // Get immediate from MI.
+      unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+                      ARM::reloc_arm_movw);
+      // Encode imm which is the same as in emitMOVi32immInstruction().
+      Binary |= Lo16 & 0xFFF;
+      Binary |= ((Lo16 >> 12) & 0xF) << 16;
+      emitWordLE(Binary);
+      return;
+  } else if(TID.Opcode == ARM::MOVTi16) {
+      unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+                       ARM::reloc_arm_movt) >> 16);
+      Binary |= Hi16 & 0xFFF;
+      Binary |= ((Hi16 >> 12) & 0xF) << 16;
+      emitWordLE(Binary);
+      return;
+  } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+      uint32_t v = ~MI.getOperand(2).getImm();
+      int32_t lsb = CountTrailingZeros_32(v);
+      int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+      // Instr{20-16} = msb, Instr{11-7} = lsb
+      Binary |= (msb & 0x1F) << 16;
+      Binary |= (lsb & 0x1F) << 7;
+      emitWordLE(Binary);
+      return;
+  } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+      // Encode Rn in Instr{0-3}
+      Binary |= getMachineOpValue(MI, OpIdx++);
+
+      uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+      uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+      // Instr{20-16} = widthm1, Instr{11-7} = lsb
+      Binary |= (widthm1 & 0x1F) << 16;
+      Binary |= (lsb & 0x1F) << 7;
+      emitWordLE(Binary);
+      return;
+  }
+
   // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
   if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
     ++OpIdx;
@@ -1366,18 +1476,66 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
       break;
     ++NumRegs;
   }
-  Binary |= NumRegs * 2;
+  // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+  // Otherwise, it will be 0, in the case of 32-bit registers.
+  if(Binary & 0x100)
+    Binary |= NumRegs * 2;
+  else
+    Binary |= NumRegs;
 
   emitWordLE(Binary);
 }
 
 void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
+  switch(Opcode) {
+  default:
+    llvm_unreachable("ARMCodeEmitter::emitMiscInstruction");
+
+  case ARM::FMSTAT:
+    // No further encoding needed.
+    break;
+
+  case ARM::VMRS:
+  case ARM::VMSR: {
+    const MachineOperand &MO0 = MI.getOperand(0);
+    // Encode Rt.
+    Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg())
+                << ARMII::RegRdShift;
+    break;
+  }
+
+  case ARM::FCONSTD:
+  case ARM::FCONSTS: {
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 0);
+
+    // Encode imm., Table A7-18 VFP modified immediate constants
+    const MachineOperand &MO1 = MI.getOperand(1);
+    unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF()
+                      .bitcastToAPInt().getHiBits(32).getLimitedValue());
+    unsigned ModifiedImm;
+
+    if(Opcode == ARM::FCONSTS)
+      ModifiedImm = (Imm & 0x80000000) >> 24 | // a
+                    (Imm & 0x03F80000) >> 19;  // bcdefgh
+    else // Opcode == ARM::FCONSTD
+      ModifiedImm = (Imm & 0x80000000) >> 24 | // a
+                    (Imm & 0x007F0000) >> 16;  // bcdefgh
+
+    // Insts{19-16} = abcd, Insts{3-0} = efgh
+    Binary |= ((ModifiedImm & 0xF0) >> 4) << 16;
+    Binary |= (ModifiedImm & 0xF);
+    break;
+  }
+  }
+
   emitWordLE(Binary);
 }
 
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 845d088..c87f5d7 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -29,6 +29,7 @@ namespace {
     ARMExpandPseudo() : MachineFunctionPass(&ID) {}
 
     const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
@@ -37,11 +38,31 @@ namespace {
     }
 
   private:
+    void TransferImpOps(MachineInstr &OldMI,
+                        MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
     bool ExpandMBB(MachineBasicBlock &MBB);
   };
   char ARMExpandPseudo::ID = 0;
 }
 
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+                                     MachineInstrBuilder &UseMI,
+                                     MachineInstrBuilder &DefMI) {
+  const TargetInstrDesc &Desc = OldMI.getDesc();
+  for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = OldMI.getOperand(i);
+    assert(MO.isReg() && MO.getReg());
+    if (MO.isUse())
+      UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill()));
+    else
+      DefMI.addReg(MO.getReg(),
+                   getDefRegState(true) | getDeadRegState(MO.isDead()));
+  }
+}
+
 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
@@ -58,52 +79,82 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
       unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
         ? ARM::tLDRpci : ARM::t2LDRpci;
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (!MI.getOperand(0).isDead()) {
-        MachineInstr *NewMI =
-          AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                                 TII->get(NewLdOpc), DstReg)
-                         .addOperand(MI.getOperand(1)));
-        NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
-          .addReg(DstReg, getDefRegState(true))
-          .addReg(DstReg)
-          .addOperand(MI.getOperand(2));
-      }
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MachineInstrBuilder MIB1 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(NewLdOpc), DstReg)
+                       .addOperand(MI.getOperand(1)));
+      (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(ARM::tPICADD))
+        .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+        .addReg(DstReg)
+        .addOperand(MI.getOperand(2));
+      TransferImpOps(MI, MIB1, MIB2);
       MI.eraseFromParent();
       Modified = true;
       break;
     }
+
     case ARM::t2MOVi32imm: {
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (!MI.getOperand(0).isDead()) {
-        const MachineOperand &MO = MI.getOperand(1);
-        MachineInstrBuilder LO16, HI16;
-
-        LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
-                       DstReg);
-        HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
-          .addReg(DstReg, getDefRegState(true)).addReg(DstReg);
-
-        if (MO.isImm()) {
-          unsigned Imm = MO.getImm();
-          unsigned Lo16 = Imm & 0xffff;
-          unsigned Hi16 = (Imm >> 16) & 0xffff;
-          LO16 = LO16.addImm(Lo16);
-          HI16 = HI16.addImm(Hi16);
-        } else {
-          const GlobalValue *GV = MO.getGlobal();
-          unsigned TF = MO.getTargetFlags();
-          LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
-          HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
-          // FIXME: What's about memoperands?
-        }
-        AddDefaultPred(LO16);
-        AddDefaultPred(HI16);
+      bool DstIsDead = MI.getOperand(0).isDead();
+      const MachineOperand &MO = MI.getOperand(1);
+      MachineInstrBuilder LO16, HI16;
+
+      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+                     DstReg);
+      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+        .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+        .addReg(DstReg);
+
+      if (MO.isImm()) {
+        unsigned Imm = MO.getImm();
+        unsigned Lo16 = Imm & 0xffff;
+        unsigned Hi16 = (Imm >> 16) & 0xffff;
+        LO16 = LO16.addImm(Lo16);
+        HI16 = HI16.addImm(Hi16);
+      } else {
+        const GlobalValue *GV = MO.getGlobal();
+        unsigned TF = MO.getTargetFlags();
+        LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+        HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
       }
+      (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      LO16.addImm(Pred).addReg(PredReg);
+      HI16.addImm(Pred).addReg(PredReg);
+      TransferImpOps(MI, LO16, HI16);
+      MI.eraseFromParent();
+      Modified = true;
+      break;
+    }
+
+    case ARM::VMOVQQ: {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+      unsigned OddDst  = TRI->getSubReg(DstReg, ARM::qsub_1);
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      bool SrcIsKill = MI.getOperand(1).isKill();
+      unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+      unsigned OddSrc  = TRI->getSubReg(SrcReg, ARM::qsub_1);
+      MachineInstrBuilder Even =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VMOVQ))
+                       .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+      MachineInstrBuilder Odd =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VMOVQ))
+                       .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill)));
+      TransferImpOps(MI, Even, Odd);
       MI.eraseFromParent();
       Modified = true;
     }
-    // FIXME: expand t2MOVi32imm
     }
     MBBI = NMBBI;
   }
@@ -113,6 +164,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
 
 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 616942c..9baef6b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -37,7 +37,8 @@ using namespace llvm;
 
 static cl::opt<bool>
 UseRegSeq("neon-reg-sequence", cl::Hidden,
-          cl::desc("Use reg_sequence to model ld / st of multiple neon regs"));
+          cl::desc("Use reg_sequence to model ld / st of multiple neon regs"),
+          cl::init(true));
 
 //===--------------------------------------------------------------------===//
 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
@@ -164,15 +165,34 @@ private:
                                ARMCC::CondCodes CCVal, SDValue CCR,
                                SDValue InFlag);
 
+  SDNode *SelectConcatVector(SDNode *N);
+
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                             char ConstraintCode,
                                             std::vector<SDValue> &OutOps);
 
-  /// PairDRegs - Insert a pair of double registers into an implicit def to
-  /// form a quad register.
+  /// PairDRegs - Form a quad register from a pair of D registers.
+  ///
   SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+
+  /// PairDRegs - Form a quad register pair from a pair of Q registers.
+  ///
+  SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+  /// QuadDRegs - Form a quad register pair from a quad of D registers.
+  ///
+  SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+  /// QuadQRegs - Form 4 consecutive Q registers.
+  ///
+  SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+  /// OctoDRegs - Form 8 consecutive D registers.
+  ///
+  SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
+                    SDValue V4, SDValue V5, SDValue V6, SDValue V7);
 };
 }
 
@@ -940,13 +960,13 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
   return NULL;
 }
 
-/// PairDRegs - Insert a pair of double registers into an implicit def to
-/// form a quad register.
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
-  if (UseRegSeq) {
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  if (llvm::ModelWithRegSequence()) {
     const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
   }
@@ -958,6 +978,62 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
                                 VT, SDValue(Pair, 0), V1, SubReg1);
 }
 
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// OctoDRegs - Form 8 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3,
+                                   SDValue V4, SDValue V5,
+                                   SDValue V6, SDValue V7) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+  SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
+  SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
+  SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
+  SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
+  const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
+                         V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
+}
+
 /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
 /// for a 64-bit subregister of the vector.
 static EVT GetNEONSubregVT(EVT VT) {
@@ -1011,7 +1087,34 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
     std::vector<EVT> ResTys(NumVecs, VT);
     ResTys.push_back(MVT::Other);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+    if (!llvm::ModelWithRegSequence() || NumVecs < 2)
+      return VLd;
+
+    SDValue RegSeq;
+    SDValue V0 = SDValue(VLd, 0);
+    SDValue V1 = SDValue(VLd, 1);
+
+    // Form a REG_SEQUENCE to force register allocation.
+    if (NumVecs == 2)
+      RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+    else {
+      SDValue V2 = SDValue(VLd, 2);
+      // If it's a vld3, form a quad D-register but discard the last part.
+      SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : SDValue(VLd, 3);
+      RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+    }
+
+    assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+      SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
+                                                 dl, VT, RegSeq);
+      ReplaceUses(SDValue(N, Vec), D);
+    }
+    ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+    return NULL;
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1026,9 +1129,24 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     Chain = SDValue(VLd, 2 * NumVecs);
 
     // Combine the even and odd subregs to produce the result.
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
-      SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
-      ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+    if (llvm::ModelWithRegSequence()) {
+      if (NumVecs == 1) {
+        SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+        ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
+      } else {
+        SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+                                       SDValue(VLd, 0), SDValue(VLd, 1),
+                                       SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+        SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+        SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+        ReplaceUses(SDValue(N, 0), Q0);
+        ReplaceUses(SDValue(N, 1), Q1);
+      }
+    } else {
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+        SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
+        ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+      }
     }
   } else {
     // Otherwise, quad registers are loaded with two separate instructions,
@@ -1051,10 +1169,37 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
     Chain = SDValue(VLdB, NumVecs+1);
 
-    // Combine the even and odd subregs to produce the result.
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
-      SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
-      ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+    if (llvm::ModelWithRegSequence()) {
+      SDValue V0 = SDValue(VLdA, 0);
+      SDValue V1 = SDValue(VLdB, 0);
+      SDValue V2 = SDValue(VLdA, 1);
+      SDValue V3 = SDValue(VLdB, 1);
+      SDValue V4 = SDValue(VLdA, 2);
+      SDValue V5 = SDValue(VLdB, 2);
+      SDValue V6 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
+                    0)
+          : SDValue(VLdA, 3);
+      SDValue V7 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
+                    0)
+          : SDValue(VLdB, 3);
+      SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+                                         V4, V5, V6, V7), 0);
+
+      // Extract out the 3 / 4 Q registers.
+      assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+        SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+                                                   dl, VT, RegSeq);
+        ReplaceUses(SDValue(N, Vec), Q);
+      }
+    } else {
+      // Combine the even and odd subregs to produce the result.
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+        SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
+        ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+      }
     }
   }
   ReplaceUses(SDValue(N, NumVecs), Chain);
@@ -1102,12 +1247,43 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
   Ops.push_back(Align);
 
   if (is64BitVector) {
-    unsigned Opc = DOpcodes[OpcodeIndex];
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-      Ops.push_back(N->getOperand(Vec+3));
+    if (llvm::ModelWithRegSequence() && NumVecs >= 2) {
+      SDValue RegSeq;
+      SDValue V0 = N->getOperand(0+3);
+      SDValue V1 = N->getOperand(1+3);
+
+      // Form a REG_SEQUENCE to force register allocation.
+      if (NumVecs == 2)
+        RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+      else {
+        SDValue V2 = N->getOperand(2+3);
+        // If it's a vld3, form a quad D-register and leave the last part as 
+        // an undef.
+        SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : N->getOperand(3+3);
+        RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+      }
+
+      // Now extract the D registers back out.
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+                                                   RegSeq));
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+                                                   RegSeq));
+      if (NumVecs > 2)
+        Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+                                                     RegSeq));
+      if (NumVecs > 3)
+        Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+                                                     RegSeq));
+    } else {
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+        Ops.push_back(N->getOperand(Vec+3));
+    }
     Ops.push_back(Pred);
     Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
+    unsigned Opc = DOpcodes[OpcodeIndex];
     return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
   }
 
@@ -1116,48 +1292,114 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
     // Quad registers are directly supported for VST1 and VST2,
     // storing pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
-      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                   N->getOperand(Vec+3)));
-      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                   N->getOperand(Vec+3)));
+    if (llvm::ModelWithRegSequence() && NumVecs == 2) {
+      // First extract the pair of Q registers.
+      SDValue Q0 = N->getOperand(3);
+      SDValue Q1 = N->getOperand(4);
+
+      // Form a QQ register.
+      SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+
+      // Now extract the D registers back out.
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+                                                   QQ));
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+                                                   QQ));
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
+                                                   QQ));
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
+                                                   QQ));
+      Ops.push_back(Pred);
+      Ops.push_back(Reg0); // predicate register
+      Ops.push_back(Chain);
+      return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
+    } else {
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+        Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+                                                     N->getOperand(Vec+3)));
+        Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+                                                     N->getOperand(Vec+3)));
+      }
+      Ops.push_back(Pred);
+      Ops.push_back(Reg0); // predicate register
+      Ops.push_back(Chain);
+      return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+                                    5 + 2 * NumVecs);
     }
-    Ops.push_back(Pred);
-    Ops.push_back(Reg0); // predicate register
-    Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
-                                  5 + 2 * NumVecs);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
   // where one stores the even registers and the other stores the odd registers.
+  if (llvm::ModelWithRegSequence()) {
+    // Form the QQQQ REG_SEQUENCE.
+    SDValue V[8];
+    for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+      V[i]   = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+                                              N->getOperand(Vec+3));
+      V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+                                              N->getOperand(Vec+3));
+    }
+    if (NumVecs == 3)
+      V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                   dl, RegVT), 0);
 
-  Ops.push_back(Reg0); // post-access address offset
+    SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+                                       V[4], V[5], V[6], V[7]), 0);
 
-  // Store the even subregs.
-  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                 N->getOperand(Vec+3)));
-  Ops.push_back(Pred);
-  Ops.push_back(Reg0); // predicate register
-  Ops.push_back(Chain);
-  unsigned Opc = QOpcodes0[OpcodeIndex];
-  SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+6);
-  Chain = SDValue(VStA, 1);
-
-  // Store the odd subregs.
-  Ops[0] = SDValue(VStA, 0); // MemAddr
-  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                N->getOperand(Vec+3));
-  Ops[NumVecs+5] = Chain;
-  Opc = QOpcodes1[OpcodeIndex];
-  SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+6);
-  Chain = SDValue(VStB, 1);
-  ReplaceUses(SDValue(N, 0), Chain);
-  return NULL;
+    // Store the even D registers.
+    assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+    Ops.push_back(Reg0); // post-access address offset
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+                                                   RegVT, RegSeq));
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0); // predicate register
+    Ops.push_back(Chain);
+    unsigned Opc = QOpcodes0[OpcodeIndex];
+    SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                          MVT::Other, Ops.data(), NumVecs+6);
+    Chain = SDValue(VStA, 1);
+
+    // Store the odd D registers.
+    Ops[0] = SDValue(VStA, 0); // MemAddr
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+                                                  RegVT, RegSeq);
+    Ops[NumVecs+5] = Chain;
+    Opc = QOpcodes1[OpcodeIndex];
+    SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                          MVT::Other, Ops.data(), NumVecs+6);
+    Chain = SDValue(VStB, 1);
+    ReplaceUses(SDValue(N, 0), Chain);
+    return NULL;
+  } else {
+    Ops.push_back(Reg0); // post-access address offset
+
+    // Store the even subregs.
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+                                                   N->getOperand(Vec+3)));
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0); // predicate register
+    Ops.push_back(Chain);
+    unsigned Opc = QOpcodes0[OpcodeIndex];
+    SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                          MVT::Other, Ops.data(), NumVecs+6);
+    Chain = SDValue(VStA, 1);
+
+    // Store the odd subregs.
+    Ops[0] = SDValue(VStA, 0); // MemAddr
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+                                                  N->getOperand(Vec+3));
+    Ops[NumVecs+5] = Chain;
+    Opc = QOpcodes1[OpcodeIndex];
+    SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                          MVT::Other, Ops.data(), NumVecs+6);
+    Chain = SDValue(VStB, 1);
+    ReplaceUses(SDValue(N, 0), Chain);
+    return NULL;
+  }
 }
 
 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1180,11 +1422,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   // Quad registers are handled by load/store of subregs. Find the subreg info.
   unsigned NumElts = 0;
   int SubregIdx = 0;
+  bool Even = false;
   EVT RegVT = VT;
   if (!is64BitVector) {
     RegVT = GetNEONSubregVT(VT);
     NumElts = RegVT.getVectorNumElements();
-    SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
+    SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1;
+    Even = Lane < NumElts;
   }
 
   unsigned OpcodeIndex;
@@ -1211,8 +1455,35 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   unsigned Opc = 0;
   if (is64BitVector) {
     Opc = DOpcodes[OpcodeIndex];
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-      Ops.push_back(N->getOperand(Vec+3));
+    if (llvm::ModelWithRegSequence()) {
+      SDValue RegSeq;
+      SDValue V0 = N->getOperand(0+3);
+      SDValue V1 = N->getOperand(1+3);
+      if (NumVecs == 2) {
+        RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+      } else {
+        SDValue V2 = N->getOperand(2+3);
+        SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : N->getOperand(3+3);
+        RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+      }
+
+      // Now extract the D registers back out.
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+                                                   RegSeq));
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+                                                   RegSeq));
+      if (NumVecs > 2)
+        Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+                                                     RegSeq));
+      if (NumVecs > 3)
+        Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+                                                     RegSeq));
+    } else {
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+        Ops.push_back(N->getOperand(Vec+3));
+    }
   } else {
     // Check if this is loading the even or odd subreg of a Q register.
     if (Lane < NumElts) {
@@ -1221,10 +1492,32 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
       Lane -= NumElts;
       Opc = QOpcodes1[OpcodeIndex];
     }
-    // Extract the subregs of the input vector.
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-      Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
-                                                   N->getOperand(Vec+3)));
+
+    if (llvm::ModelWithRegSequence()) {
+      SDValue RegSeq;
+      SDValue V0 = N->getOperand(0+3);
+      SDValue V1 = N->getOperand(1+3);
+      if (NumVecs == 2) {
+        RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+      } else {
+        SDValue V2 = N->getOperand(2+3);
+        SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : N->getOperand(3+3);
+        RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+      }
+
+      // Extract the subregs of the input vector.
+      unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+        Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
+                                                     RegSeq));
+    } else {
+      // Extract the subregs of the input vector.
+      for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+        Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
+                                                     N->getOperand(Vec+3)));
+    }
   }
   Ops.push_back(getI32Imm(Lane));
   Ops.push_back(Pred);
@@ -1236,8 +1529,60 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
 
   std::vector<EVT> ResTys(NumVecs, RegVT);
   ResTys.push_back(MVT::Other);
-  SDNode *VLdLn =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
+
+  if (llvm::ModelWithRegSequence()) {
+    // Form a REG_SEQUENCE to force register allocation.
+    SDValue RegSeq;
+    if (is64BitVector) {
+      SDValue V0 = SDValue(VLdLn, 0);
+      SDValue V1 = SDValue(VLdLn, 1);
+      if (NumVecs == 2) {
+        RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+      } else {
+        SDValue V2 = SDValue(VLdLn, 2);
+        // If it's a vld3, form a quad D-register but discard the last part.
+        SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : SDValue(VLdLn, 3);
+        RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+      }
+    } else {
+      // For 128-bit vectors, take the 64-bit results of the load and insert them
+      // as subregs into the result.
+      SDValue V[8];
+      for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+        if (Even) {
+          V[i]   = SDValue(VLdLn, Vec);
+          V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                  dl, RegVT), 0);
+        } else {
+          V[i]   = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                  dl, RegVT), 0);
+          V[i+1] = SDValue(VLdLn, Vec);
+        }
+      }
+      if (NumVecs == 3)
+        V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                     dl, RegVT), 0);
+
+      if (NumVecs == 2)
+        RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
+      else
+        RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+                                   V[4], V[5], V[6], V[7]), 0);
+    }
+
+    assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+    assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+    unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      ReplaceUses(SDValue(N, Vec),
+                  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
+    ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+    return NULL;
+  }
+
   // For a 64-bit vector load to D registers, nothing more needs to be done.
   if (is64BitVector)
     return VLdLn;
@@ -1481,6 +1826,21 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
 }
 
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+  // The only time a CONCAT_VECTORS operation can have legal types is when
+  // two 64-bit vectors are concatenated to a 128-bit vector.
+  EVT VT = N->getValueType(0);
+  if (!VT.is128BitVector() || N->getNumOperands() != 2)
+    llvm_unreachable("unexpected CONCAT_VECTORS");
+  DebugLoc dl = N->getDebugLoc();
+  SDValue V0 = N->getOperand(0);
+  SDValue V1 = N->getOperand(1);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
@@ -1695,8 +2055,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Pred = getAL(CurDAG);
       SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
-      return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
-                                    Ops, 5);
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+      SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
+                                           MVT::v2f64, MVT::Other, Ops, 5);
+      cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+      return Ret;
     }
     // Other cases are autogenerated.
     break;
@@ -1712,7 +2076,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
                         AM5Opc, Pred, PredReg, Chain };
-      return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+      SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+      cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+      return Ret;
     }
     // Other cases are autogenerated.
     break;
@@ -1971,7 +2339,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
     }
+    break;
   }
+
+  case ISD::CONCAT_VECTORS:
+    return SelectConcatVector(N);
   }
 
   return SelectCode(N);
@@ -1995,3 +2367,9 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
                                      CodeGenOpt::Level OptLevel) {
   return new ARMDAGToDAGISel(TM, OptLevel);
 }
+
+/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
+/// operations involving sub-registers.
+bool llvm::ModelWithRegSequence() {
+  return UseRegSeq;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index d3842a6..b8126a3 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -94,7 +94,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   }
   setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+  if (llvm::ModelWithRegSequence())
+    setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+  else
+    setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
@@ -360,8 +363,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 
   // These are expanded into libcalls.
-  setOperationAction(ISD::SDIV,  MVT::i32, Expand);
-  setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+  if (!Subtarget->hasDivide()) {
+    // v7M has a hardware divider
+    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+  }
   setOperationAction(ISD::SREM,  MVT::i32, Expand);
   setOperationAction(ISD::UREM,  MVT::i32, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
@@ -373,6 +379,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
   // Use the default implementation.
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
@@ -387,7 +395,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
 
-  if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
+  // If the subtarget does not have extract instructions, sign_extend_inreg
+  // needs to be expanded. Extract is available in ARM mode on v6 and up,
+  // and on most Thumb2 implementations.
+  if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
+      || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
   }
@@ -400,6 +412,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
@@ -451,9 +465,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::SUB);
+  setTargetDAGCombine(ISD::MUL);
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
-  setSchedulingPreference(SchedulingForRegPressure);
+
+  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+    setSchedulingPreference(Sched::RegPressure);
+  else
+    setSchedulingPreference(Sched::Hybrid);
 
   // FIXME: If-converter should use instruction latency to determine
   // profitability rather than relying on fixed limits.
@@ -567,11 +586,35 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+  // load / store 4 to 8 consecutive D registers.
+  if (Subtarget->hasNEON()) {
+    if (VT == MVT::v4i64)
+      return ARM::QQPRRegisterClass;
+    else if (VT == MVT::v8i64)
+      return ARM::QQQQPRRegisterClass;
+  }
+  return TargetLowering::getRegClassFor(VT);
+}
+
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
   return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
 }
 
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT.isFloatingPoint() || VT.isVector())
+      return Sched::Latency;
+  }
+  return Sched::RegPressure;
+}
+
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
@@ -1507,6 +1550,23 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
 }
 
 SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Val = Subtarget->isThumb() ?
+    DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
+    DAG.getConstant(0, MVT::i32);
+  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+                     Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                            const ARMSubtarget *Subtarget)
                                              const {
@@ -1545,12 +1605,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     }
     return Result;
   }
-  case Intrinsic::eh_sjlj_setjmp:
-    SDValue Val = Subtarget->isThumb() ?
-      DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
-      DAG.getConstant(0, MVT::i32);
-    return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
-                       Val);
   }
 }
 
@@ -1652,7 +1706,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     RC = ARM::GPRRegisterClass;
 
   // Transform the arguments stored in physical registers into virtual ones.
-  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 
   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
 
   SDValue ArgValue2;
@@ -2092,9 +2146,31 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
 }
 
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  if (Depth) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+                       NULL, 0, false, false, 0);
+  }
+
+  // Return LR, which contains the return address. Mark it an implicit live-in.
+  unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); 
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
+
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2107,116 +2183,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   return FrameAddr;
 }
 
-SDValue
-ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
-                                           SDValue Chain,
-                                           SDValue Dst, SDValue Src,
-                                           SDValue Size, unsigned Align,
-                                           bool isVolatile, bool AlwaysInline,
-                                           const Value *DstSV,
-                                           uint64_t DstSVOff,
-                                           const Value *SrcSV,
-                                           uint64_t SrcSVOff) const {
-  // Do repeated 4-byte loads and stores. To be improved.
-  // This requires 4-byte alignment.
-  if ((Align & 3) != 0)
-    return SDValue();
-  // This requires the copy size to be a constant, preferrably
-  // within a subtarget-specific limit.
-  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-  if (!ConstantSize)
-    return SDValue();
-  uint64_t SizeVal = ConstantSize->getZExtValue();
-  if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
-    return SDValue();
-
-  unsigned BytesLeft = SizeVal & 3;
-  unsigned NumMemOps = SizeVal >> 2;
-  unsigned EmittedNumMemOps = 0;
-  EVT VT = MVT::i32;
-  unsigned VTSize = 4;
-  unsigned i = 0;
-  const unsigned MAX_LOADS_IN_LDM = 6;
-  SDValue TFOps[MAX_LOADS_IN_LDM];
-  SDValue Loads[MAX_LOADS_IN_LDM];
-  uint64_t SrcOff = 0, DstOff = 0;
-
-  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
-  // same number of stores.  The loads and stores will get combined into
-  // ldm/stm later on.
-  while (EmittedNumMemOps < NumMemOps) {
-    for (i = 0;
-         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
-      Loads[i] = DAG.getLoad(VT, dl, Chain,
-                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                         DAG.getConstant(SrcOff, MVT::i32)),
-                             SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
-      TFOps[i] = Loads[i].getValue(1);
-      SrcOff += VTSize;
-    }
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
-    for (i = 0;
-         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
-      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
-                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                          DAG.getConstant(DstOff, MVT::i32)),
-                              DstSV, DstSVOff + DstOff, isVolatile, false, 0);
-      DstOff += VTSize;
-    }
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
-    EmittedNumMemOps += i;
-  }
-
-  if (BytesLeft == 0)
-    return Chain;
-
-  // Issue loads / stores for the trailing (1 - 3) bytes.
-  unsigned BytesLeftSave = BytesLeft;
-  i = 0;
-  while (BytesLeft) {
-    if (BytesLeft >= 2) {
-      VT = MVT::i16;
-      VTSize = 2;
-    } else {
-      VT = MVT::i8;
-      VTSize = 1;
-    }
-
-    Loads[i] = DAG.getLoad(VT, dl, Chain,
-                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                       DAG.getConstant(SrcOff, MVT::i32)),
-                           SrcSV, SrcSVOff + SrcOff, false, false, 0);
-    TFOps[i] = Loads[i].getValue(1);
-    ++i;
-    SrcOff += VTSize;
-    BytesLeft -= VTSize;
-  }
-  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
-  i = 0;
-  BytesLeft = BytesLeftSave;
-  while (BytesLeft) {
-    if (BytesLeft >= 2) {
-      VT = MVT::i16;
-      VTSize = 2;
-    } else {
-      VT = MVT::i8;
-      VTSize = 1;
-    }
-
-    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
-                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                        DAG.getConstant(DstOff, MVT::i32)),
-                            DstSV, DstSVOff + DstOff, false, false, 0);
-    ++i;
-    DstOff += VTSize;
-    BytesLeft -= VTSize;
-  }
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-}
-
 /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
 /// expand a bit convert where either the source or destination type is i64 to
 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
@@ -2434,9 +2400,9 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
 
   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
-                             DAG.getConstant(0, MVT::i32));
+                           DAG.getConstant(0, MVT::i32));
   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
-                             DAG.getConstant(1, MVT::i32));
+                           DAG.getConstant(1, MVT::i32));
 
   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   // captures the result into a carry flag.
@@ -2879,21 +2845,60 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     }
   }
 
-  // If there are only 2 elements in a 128-bit vector, insert them into an
-  // undef vector.  This handles the common case for 128-bit vector argument
-  // passing, where the insertions should be translated to subreg accesses
-  // with no real instructions.
-  if (VT.is128BitVector() && Op.getNumOperands() == 2) {
-    SDValue Val = DAG.getUNDEF(VT);
-    SDValue Op0 = Op.getOperand(0);
-    SDValue Op1 = Op.getOperand(1);
-    if (Op0.getOpcode() != ISD::UNDEF)
-      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
-                        DAG.getIntPtrConstant(0));
-    if (Op1.getOpcode() != ISD::UNDEF)
-      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
-                        DAG.getIntPtrConstant(1));
-    return Val;
+  // Scan through the operands to see if only one value is used.
+  unsigned NumElts = VT.getVectorNumElements();
+  bool isOnlyLowElement = true;
+  bool usesOnlyOneValue = true;
+  bool isConstant = true;
+  SDValue Value;
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = Op.getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    if (i > 0)
+      isOnlyLowElement = false;
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+      isConstant = false;
+
+    if (!Value.getNode())
+      Value = V;
+    else if (V != Value)
+      usesOnlyOneValue = false;
+  }
+
+  if (!Value.getNode())
+    return DAG.getUNDEF(VT);
+
+  if (isOnlyLowElement)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+  // If all elements are constants, fall back to the default expansion, which
+  // will generate a load from the constant pool.
+  if (isConstant)
+    return SDValue();
+
+  // Use VDUP for non-constant splats.
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  if (usesOnlyOneValue && EltSize <= 32)
+    return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+  // Vectors with 32- or 64-bit elements can be built by directly assigning
+  // the subregisters.
+  if (EltSize >= 32) {
+    // Do the expansion with floating-point types, since that is what the VFP
+    // registers are defined to use, and since i64 is not legal.
+    EVT EltVT = EVT::getFloatingPointVT(EltSize);
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+    SDValue Val = DAG.getUNDEF(VecVT);
+    for (unsigned i = 0; i < NumElts; ++i) {
+      SDValue Elt = Op.getOperand(i);
+      if (Elt.getOpcode() == ISD::UNDEF)
+        continue;
+      Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
+      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
+                        DAG.getConstant(i, MVT::i32));
+    }
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
   }
 
   return SDValue();
@@ -3083,8 +3088,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
   // If the shuffle is not directly supported and it has 4 elements, use
   // the PerfectShuffle-generated table to synthesize it from other shuffles.
-  if (VT.getVectorNumElements() == 4 &&
-      (VT.is128BitVector() || VT.is64BitVector())) {
+  unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts == 4) {
     unsigned PFIndexes[4];
     for (unsigned i = 0; i != 4; ++i) {
       if (ShuffleMask[i] < 0)
@@ -3096,7 +3101,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     // Compute the index in the perfect shuffle table.
     unsigned PFTableIndex =
       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
-
     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
     unsigned Cost = (PFEntry >> 30);
 
@@ -3104,6 +3108,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
   }
 
+  // Implement shuffles with 32- or 64-bit elements as subreg copies.
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  if (EltSize >= 32) {
+    // Do the expansion with floating-point types, since that is what the VFP
+    // registers are defined to use, and since i64 is not legal.
+    EVT EltVT = EVT::getFloatingPointVT(EltSize);
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
+    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+    SDValue Val = DAG.getUNDEF(VecVT);
+    for (unsigned i = 0; i < NumElts; ++i) {
+      if (ShuffleMask[i] < 0)
+        continue;
+      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                ShuffleMask[i] < (int)NumElts ? V1 : V2,
+                                DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+                                                MVT::i32));
+      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
+                        Elt, DAG.getConstant(i, MVT::i32));
+    }
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+  }
+
   return SDValue();
 }
 
@@ -3158,9 +3185,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
-  case ISD::RETURNADDR:    break;
+  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
                                                                Subtarget);
   case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
@@ -3667,6 +3696,62 @@ static SDValue PerformSUBCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformMULCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+
+  if (Subtarget->isThumb1Only())
+    return SDValue();
+
+  if (DAG.getMachineFunction().
+      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    return SDValue();
+
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32)
+    return SDValue();
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  uint64_t MulAmt = C->getZExtValue();
+  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+  ShiftAmt = ShiftAmt & (32 - 1);
+  SDValue V = N->getOperand(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  SDValue Res;
+  MulAmt >>= ShiftAmt;
+  if (isPowerOf2_32(MulAmt - 1)) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    Res = DAG.getNode(ISD::ADD, DL, VT,
+                      V, DAG.getNode(ISD::SHL, DL, VT,
+                                     V, DAG.getConstant(Log2_32(MulAmt-1),
+                                                        MVT::i32)));
+  } else if (isPowerOf2_32(MulAmt + 1)) {
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    Res = DAG.getNode(ISD::SUB, DL, VT,
+                      DAG.getNode(ISD::SHL, DL, VT,
+                                  V, DAG.getConstant(Log2_32(MulAmt+1),
+                                                     MVT::i32)),
+                                                     V);
+  } else
+    return SDValue();
+
+  if (ShiftAmt != 0)
+    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+                      DAG.getConstant(ShiftAmt, MVT::i32));
+
+  // Do not add new nodes to DAG combiner worklist.
+  DCI.CombineTo(N, Res, false);
+  return SDValue();
+}
+
 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
 /// ARMISD::VMOVRRD.
 static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4053,6 +4138,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   default: break;
   case ISD::ADD:        return PerformADDCombine(N, DCI);
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
+  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
@@ -4432,9 +4518,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   bool isSEXTLoad = false;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     VT  = LD->getMemoryVT();
+    Ptr = LD->getBasePtr();
     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     VT  = ST->getMemoryVT();
+    Ptr = ST->getBasePtr();
   } else
     return false;
 
@@ -4442,13 +4530,25 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   bool isLegal = false;
   if (Subtarget->isThumb2())
     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
-                                        isInc, DAG);
+                                       isInc, DAG);
   else
     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
                                         isInc, DAG);
   if (!isLegal)
     return false;
 
+  if (Ptr != Base) {
+    // Swap base ptr and offset to catch more post-index load / store when
+    // it's legal. In Thumb2 mode, offset must be an immediate.
+    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+        !Subtarget->isThumb2())
+      std::swap(Base, Offset);
+
+    // Post-indexed load / store update the base pointer.
+    if (Ptr != Base)
+      return false;
+  }
+
   AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
   return true;
 }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d8a230f..9c7517c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -236,13 +236,19 @@ namespace llvm {
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
 
-    virtual const ARMSubtarget* getSubtarget() const {
+    const ARMSubtarget* getSubtarget() const {
       return Subtarget;
     }
 
+    /// getRegClassFor - Return the register class that should be used for the
+    /// specified value type.
+    virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
+    Sched::Preference getSchedulingPreference(SDNode *N) const;
+
     bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -281,7 +287,8 @@ namespace llvm {
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,
                              ISD::ArgFlagsTy Flags) const;
-    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                     const ARMSubtarget *Subtarget) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -296,20 +303,12 @@ namespace llvm {
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
 
-    SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
-                                      SDValue Chain,
-                                      SDValue Dst, SDValue Src,
-                                      SDValue Size, unsigned Align,
-                                      bool isVolatile, bool AlwaysInline,
-                                      const Value *DstSV,
-                                      uint64_t DstSVOff,
-                                      const Value *SrcSV,
-                                      uint64_t SrcSVOff) const;
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index b466d0d..d487df1 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -228,7 +228,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin,
             "", itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
 }
 
@@ -240,7 +240,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
 }
@@ -252,7 +252,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = !strconcat(opc, asm);
+  let AsmString = !strconcat(opc, asm);
   let Pattern = pattern;
   let isPredicable = 0;
   list<Predicate> Predicates = [IsARM];
@@ -268,7 +268,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
-  let AsmString   = !strconcat(opc, !strconcat("${p}${s}", asm));
+  let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
 }
@@ -280,7 +280,7 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
 }
@@ -959,7 +959,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb];
 }
@@ -995,7 +995,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
 }
@@ -1140,7 +1140,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
-  let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
+  let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
 }
@@ -1152,7 +1152,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
 }
@@ -1163,7 +1163,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
 }
@@ -1280,7 +1280,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [HasVFP2];
 }
@@ -1292,7 +1292,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString   = asm;
+  let AsmString = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [HasVFP2];
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index ce5f2f8..f3156d9 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -46,6 +46,7 @@ def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
 def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
                                                  SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
 
 def SDT_ARMMEMBARRIERV7  : SDTypeProfile<0, 0, []>;
 def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
@@ -100,7 +101,10 @@ def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
 def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInFlag ]>;
 
 def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
-def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+                               SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+                                SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
 
 def ARMMemBarrierV7  : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
                               [SDNPHasChain]>;
@@ -128,6 +132,8 @@ def NoVFP     : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
 def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
 def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
 def IsThumb   : Predicate<"Subtarget->isThumb()">;
@@ -654,12 +660,12 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
 let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def ADJCALLSTACKUP :
 PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
-           "@ ADJCALLSTACKUP $amt1",
+           "${:comment} ADJCALLSTACKUP $amt1",
            [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
 
 def ADJCALLSTACKDOWN :
 PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
-           "@ ADJCALLSTACKDOWN $amt",
+           "${:comment} ADJCALLSTACKDOWN $amt",
            [(ARMcallseq_start timm:$amt)]>;
 }
 
@@ -789,8 +795,11 @@ def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
 }
 
 // A5.4 Permanently UNDEFINED instructions.
-def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "",
-              [/* For disassembly only; pattern left blank */]>,
+// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
+// binutils
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, 
+               ".long 0xe7ffdefe ${:comment} trap", [(trap)]>,
            Requires<[IsARM]> {
   let Inst{27-25} = 0b011;
   let Inst{24-20} = 0b11111;
@@ -843,25 +852,19 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
 
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
 def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
                     Pseudo, IIC_iALUi,
-           !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(",
-                                 "${:private}PCRELL${:uid}+8))\n"),
-                      !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                 "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
-                   []>;
+                    "adr$p\t$dst, #$label", []>;
 
 def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
                            (ins i32imm:$label, nohash_imm:$id, pred:$p),
-          Pseudo, IIC_iALUi,
-   !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, "
-                         "(${label}_${id}-(",
-                                  "${:private}PCRELL${:uid}+8))\n"),
-                       !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                 "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
-                   []> {
+                      Pseudo, IIC_iALUi,
+                      "adr$p\t$dst, #${label}_${id}", []> {
     let Inst{25} = 1;
 }
+} // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
@@ -1134,7 +1137,8 @@ def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                [(set GPR:$dst, (load addrmode2:$addr))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+    isReMaterializable = 1 in
 def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                  "ldr", "\t$dst, $addr", []>;
 
@@ -1156,7 +1160,7 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
                    IIC_iLoadr, "ldrsb", "\t$dst, $addr",
                    [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
                  IIC_iLoadr, "ldrd", "\t$dst1, $addr",
@@ -1215,7 +1219,7 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
             "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
                 Requires<[IsARM, HasV5TE]>;
 
-}
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
 
 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
 
@@ -1264,7 +1268,7 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
                [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
 
 // Store doubleword
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
 def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
                StMiscFrm, IIC_iStorer,
                "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
@@ -1356,7 +1360,7 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
                           reglist:$dsts, variable_ops),
                  IndexModeNone, LdStMulFrm, IIC_iLoadm,
@@ -1367,9 +1371,9 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                      IndexModeUpd, LdStMulFrm, IIC_iLoadm,
                      "ldm${addr:submode}${p}\t$addr!, $dsts",
                      "$addr.addr = $wb", []>;
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
                           reglist:$srcs, variable_ops),
                  IndexModeNone, LdStMulFrm, IIC_iStorem,
@@ -1380,7 +1384,7 @@ def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                      IndexModeUpd, LdStMulFrm, IIC_iStorem,
                      "stm${addr:submode}${p}\t$addr!, $srcs",
                      "$addr.addr = $wb", []>;
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
@@ -2198,6 +2202,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
 def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
                 IIC_iCMOVr, "mov", "\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2221,6 +2226,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
                 RegConstraint<"$false = $dst">, UnaryDP {
   let Inst{25} = 1;
 }
+} // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
 // Atomic operations intrinsics
@@ -2528,12 +2534,12 @@ let Defs =
   def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
                                AddrModeNone, SizeSpecial, IndexModeNone,
                                Pseudo, NoItinerary,
-                               "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+                               "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
                                "add\t$val, pc, #8\n\t"
                                "str\t$val, [$src, #+4]\n\t"
                                "mov\tr0, #0\n\t"
                                "add\tpc, pc, #0\n\t"
-                               "mov\tr0, #1 @ eh_setjmp end", "",
+                               "mov\tr0, #1 ${:comment} eh_setjmp end", "",
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                            Requires<[IsARM, HasVFP2]>;
 }
@@ -2543,16 +2549,30 @@ let Defs =
   def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
                                    AddrModeNone, SizeSpecial, IndexModeNone,
                                    Pseudo, NoItinerary,
-                                   "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+                                   "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
                                    "add\t$val, pc, #8\n\t"
                                    "str\t$val, [$src, #+4]\n\t"
                                    "mov\tr0, #0\n\t"
                                    "add\tpc, pc, #0\n\t"
-                                   "mov\tr0, #1 @ eh_setjmp end", "",
+                                   "mov\tr0, #1 ${:comment} eh_setjmp end", "",
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                                 Requires<[IsARM, NoVFP]>;
 }
 
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+    Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+                             AddrModeNone, SizeSpecial, IndexModeNone,
+                             Pseudo, NoItinerary,
+                             "ldr\tsp, [$src, #8]\n\t"
+                             "ldr\t$scratch, [$src, #4]\n\t"
+                             "ldr\tr7, [$src]\n\t"
+                             "bx\t$scratch", "",
+                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                                Requires<[IsARM, IsDarwin]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index d5ce2b8..197ec16 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -115,7 +115,7 @@ def h64imm : Operand<i64> {
 // NEON load / store instructions
 //===----------------------------------------------------------------------===//
 
-let mayLoad = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1 in {
 // Use vldmia to load a Q register as a D register pair.
 // This is equivalent to VLDMD except that it has a Q register operand
 // instead of a pair of D registers.
@@ -123,11 +123,6 @@ def VLDMQ
   : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
           IndexModeNone, IIC_fpLoadm,
           "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
-def VLDMQ_UPD
-  : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p),
-          IndexModeUpd, IIC_fpLoadm,
-          "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}",
-          "$addr.base = $wb", []>;
 
 // Use vld1 to load a Q register as a D register pair.
 // This alternative to VLDMQ allows an alignment to be specified.
@@ -135,13 +130,9 @@ def VLDMQ_UPD
 def VLD1q
   : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
           IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
-def VLD1q_UPD
-  : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
-          "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
-} // mayLoad = 1
+} // mayLoad = 1, neverHasSideEffects = 1
 
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
 // Use vstmia to store a Q register as a D register pair.
 // This is equivalent to VSTMD except that it has a Q register operand
 // instead of a pair of D registers.
@@ -149,11 +140,6 @@ def VSTMQ
   : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
           IndexModeNone, IIC_fpStorem,
           "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
-def VSTMQ_UPD
-  : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p),
-          IndexModeUpd, IIC_fpStorem,
-          "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}",
-          "$addr.base = $wb", []>;
 
 // Use vst1 to store a Q register as a D register pair.
 // This alternative to VSTMQ allows an alignment to be specified.
@@ -161,14 +147,9 @@ def VSTMQ_UPD
 def VST1q
   : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
           IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
-def VST1q_UPD
-  : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset, QPR:$src),
-          IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
-          "$addr.addr = $wb", []>;
-} // mayStore = 1
+} // mayStore = 1, neverHasSideEffects = 1
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 
 //   VLD1     : Vector Load (multiple single elements)
 class VLD1D<bits<4> op7_4, string Dt>
@@ -492,9 +473,9 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
 //   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
 //   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
 //   FIXME: Not yet implemented.
-} // mayLoad = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 
 //   VST1     : Vector Store (multiple single elements)
 class VST1D<bits<4> op7_4, string Dt>
@@ -807,7 +788,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
 
-} // mayStore = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 
 
 //===----------------------------------------------------------------------===//
@@ -815,27 +796,32 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
 //===----------------------------------------------------------------------===//
 
 // Extract D sub-registers of Q registers.
-// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6)
 def DSubReg_i8_reg  : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
 }]>;
 def DSubReg_i16_reg : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
 }]>;
 def DSubReg_i32_reg : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
 }]>;
 def DSubReg_f64_reg : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
 }]>;
 def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()),
+                                   MVT::i32);
 }]>;
 
 // Extract S sub-registers of Q/D registers.
-// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.)
 def SSubReg_f32_reg : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32);
+  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
 }]>;
 
 // Translate lane numbers from Q registers to D subregs.
@@ -2829,11 +2815,21 @@ def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
 
 //   VMOV     : Vector Move (Register)
 
+let neverHasSideEffects = 1 in {
 def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
                      N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
 def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
                      N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
 
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+                NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+
+def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+                NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+} // neverHasSideEffects
+
 //   VMOV     : Vector Move (Immediate)
 
 // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
@@ -2871,6 +2867,7 @@ def vmovImm64 : PatLeaf<(build_vector), [{
 // Note: Some of the cmode bits in the following VMOV instructions need to
 // be encoded based on the immed values.
 
+let isReMaterializable = 1 in {
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
                          (ins h8imm:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$dst, $SIMM", "",
@@ -2906,6 +2903,7 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
                          (ins h64imm:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$dst, $SIMM", "",
                          [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
+} // isReMaterializable
 
 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
 
@@ -3018,11 +3016,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
           (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
 
 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
-          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
-          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>;
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
-          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
 
 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
           (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
@@ -3034,15 +3032,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
                          (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
-                         arm_dsubreg_0)>;
+                         dsub_0)>;
 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
                          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
-                         arm_dsubreg_0)>;
+                         dsub_0)>;
 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
                          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
-                         arm_dsubreg_0)>;
+                         dsub_0)>;
 
 //   VDUP     : Vector Duplicate (from ARM core register to all elements)
 
@@ -3376,27 +3374,27 @@ def  VTBX4
 class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
   : NEONFPPat<(ResTy (OpNode SPR:$a)),
               (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
-                                                       SPR:$a, arm_ssubreg_0))),
-                              arm_ssubreg_0)>;
+                                                       SPR:$a, ssub_0))),
+                              ssub_0)>;
 
 class N3VSPat<SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
               (EXTRACT_SUBREG (v2f32
                                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                      SPR:$a, arm_ssubreg_0),
+                                                      SPR:$a, ssub_0),
                                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                      SPR:$b, arm_ssubreg_0))),
-                              arm_ssubreg_0)>;
+                                                      SPR:$b, ssub_0))),
+                              ssub_0)>;
 
 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
               (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$acc, arm_ssubreg_0),
+                                                   SPR:$acc, ssub_0),
                                     (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$a, arm_ssubreg_0),
+                                                   SPR:$a, ssub_0),
                                     (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$b, arm_ssubreg_0)),
-                              arm_ssubreg_0)>;
+                                                   SPR:$b, ssub_0)),
+                              ssub_0)>;
 
 // These need separate instructions because they must use DPR_VFP2 register
 // class which have SPR sub-registers.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e3ca536..40f924b 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -127,12 +127,12 @@ def t_addrmode_sp : Operand<i32>,
 let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def tADJCALLSTACKUP :
 PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
-           "@ tADJCALLSTACKUP $amt1",
+           "${:comment} tADJCALLSTACKUP $amt1",
            [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
 
 def tADJCALLSTACKDOWN :
 PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
-           "@ tADJCALLSTACKDOWN $amt",
+           "${:comment} tADJCALLSTACKDOWN $amt",
            [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
 }
 
@@ -254,14 +254,14 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
 // Pseudo instruction that will expand into a tSUBspi + a copy.
 let usesCustomInserter = 1 in { // Expanded after instruction selection.
 def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
-               NoItinerary, "@ sub\t$dst, $rhs", []>;
+               NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
 
 def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-               NoItinerary, "@ add\t$dst, $rhs", []>;
+               NoItinerary, "${:comment} add\t$dst, $rhs", []>;
 
 let Defs = [CPSR] in
 def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-             NoItinerary, "@ and\t$dst, $rhs", []>;
+             NoItinerary, "${:comment} and\t$dst, $rhs", []>;
 } // usesCustomInserter
 
 //===----------------------------------------------------------------------===//
@@ -374,7 +374,7 @@ let isBranch = 1, isTerminator = 1 in {
   // Far jump
   let Defs = [LR] in
   def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
-                    "bl\t$target\t@ far jump",[]>;
+                    "bl\t$target\t${:comment} far jump",[]>;
 
   def tBR_JTr : T1JTI<(outs),
                       (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
@@ -417,9 +417,13 @@ def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
 }
 }
 
-// A8.6.16 B: Encoding T1 -- for disassembly only
+// A8.6.16 B: Encoding T1
 // If Inst{11-8} == 0b1110 then UNDEFINED
-def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 {
+// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
+// binutils
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br, 
+               ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 {
   let Inst{15-12} = 0b1101;
   let Inst{11-8} = 0b1110;
 }
@@ -476,7 +480,7 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
 
 // Special instruction for restore. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
-let canFoldAsLoad = 1, mayLoad = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
 def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
                     "ldr", "\t$dst, $addr", []>,
                T1LdStSP<{1,?,?}>;
@@ -490,7 +494,8 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
               T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
 
 // Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+    isReMaterializable = 1 in
 def tLDRcp  : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
                   "ldr", "\t$dst, $addr", []>,
               T1LdStSP<{1,?,?}>;
@@ -527,7 +532,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
                    [(store tGPR:$src, t_addrmode_sp:$addr)]>,
               T1LdStSP<{0,?,?}>;
 
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
 // Special instruction for spill. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
@@ -540,7 +545,7 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
 //
 
 // These requires base address to be written back or one of the loaded regs.
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 def tLDM : T1I<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
                IIC_iLoadm,
@@ -553,9 +558,9 @@ def tLDM_UPD : T1It<(outs tGPR:$wb),
                     "ldm${addr:submode}${p}\t$addr!, $dsts",
                     "$addr.addr = $wb", []>,
                T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
 def tSTM_UPD : T1It<(outs tGPR:$wb),
                     (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
                     IIC_iStorem,
@@ -866,11 +871,12 @@ def tUXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
 let usesCustomInserter = 1 in  // Expanded after instruction selection.
   def tMOVCCr_pseudo :
   PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
-              NoItinerary, "@ tMOVCCr $cc",
+              NoItinerary, "${:comment} tMOVCCr $cc",
              [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
 
 
 // 16-bit movcc in IT blocks for Thumb2.
+let neverHasSideEffects = 1 in {
 def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
                     "mov", "\t$dst, $rhs", []>,
               T1Special<{1,0,?,?}>;
@@ -878,9 +884,12 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
 def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
                     "mov", "\t$dst, $rhs", []>,
               T1General<{1,0,0,?,?}>;
+} // neverHasSideEffects
 
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
 def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
                     "adr$p\t$dst, #$label", []>,
                 T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
@@ -889,6 +898,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst),
                       (ins i32imm:$label, nohash_imm:$id, pred:$p),
                       IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
                   T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+} // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
@@ -918,16 +928,32 @@ let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12 ] in {
   def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
                               AddrModeNone, SizeSpecial, NoItinerary,
-                              "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                              "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
                               "\tmov\t$val, pc\n"
-                              "\tadds\t$val, #9\n"
+                              "\tadds\t$val, #7\n"
                               "\tstr\t$val, [$src, #4]\n"
                               "\tmovs\tr0, #0\n"
                               "\tb\t1f\n"
-                              "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                              "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
                               "1:", "",
                    [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 }
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+    Defs = [ R7, LR, SP ] in {
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+                             AddrModeNone, SizeSpecial, IndexModeNone,
+                             Pseudo, NoItinerary,
+                             "ldr\t$scratch, [$src, #8]\n\t"
+                             "mov\tsp, $scratch\n\t"
+                             "ldr\t$scratch, [$src, #4]\n\t"
+                             "ldr\tr7, [$src]\n\t"
+                             "bx\t$scratch", "",
+                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                                Requires<[IsThumb, IsDarwin]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
@@ -1011,7 +1037,7 @@ def : T1Pat<(i32 imm0_255_comp:$src),
 // scheduling.
 let isReMaterializable = 1 in
 def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
-                   NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+                   NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 742bd40..b91c089 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -185,8 +185,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{15} = 0;
    }
    // register
-   def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
-               opc, ".w\t$dst, $src",
+   def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+                opc, ".w\t$dst, $src",
                 [(set GPR:$dst, (opnode GPR:$src))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -198,9 +198,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
-               opc, ".w\t$dst, $src",
-               [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+   def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+                opc, ".w\t$dst, $src",
+                [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -210,7 +210,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
 }
 
 /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-//  binary operation that produces a value. These are predicable and can be
+/// binary operation that produces a value. These are predicable and can be
 /// changed to modify CPSR.
 multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
                        bit Commutable = 0, string wide =""> {
@@ -259,23 +259,23 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
 /// T2I_bin_irs counterpart.
 multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
-                opc, ".w\t$dst, $rhs, $lhs",
-                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+                 opc, ".w\t$dst, $rhs, $lhs",
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
-     let Inst{20} = 0; // The S bit.
+     let Inst{20} = ?; // The S bit.
      let Inst{15} = 0;
    }
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
-                opc, "\t$dst, $rhs, $lhs",
-                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+                 opc, "\t$dst, $rhs, $lhs",
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = 0; // The S bit.
+     let Inst{20} = ?; // The S bit.
    }
 }
 
@@ -461,10 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
 let Defs = [CPSR] in {
 multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
-                 IIC_iALUi,
-                 !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"),
-                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+                !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
+                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -472,10 +471,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
      let Inst{15} = 0;
    }
    // shifted register
-   def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
-                 IIC_iALUsi,
-                 !strconcat(opc, "${s}\t$dst, $rhs, $lhs"),
-                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+                !strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
+                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -639,7 +637,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
 multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
   def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                   opc, ".w\t$dst, $src",
-                 [(set GPR:$dst, (opnode GPR:$src))]> {
+                 [(set GPR:$dst, (opnode GPR:$src))]>,
+                 Requires<[HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -650,7 +649,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
    }
   def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
                   opc, ".w\t$dst, $src, ror $rot",
-                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+                 Requires<[HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -665,7 +665,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
 multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
   def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                   opc, "\t$dst, $src",
-                 [(set GPR:$dst, (opnode GPR:$src))]> {
+                 [(set GPR:$dst, (opnode GPR:$src))]>,
+                 Requires<[HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -676,7 +677,8 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
    }
   def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
                   opc, "\t$dst, $src, ror $rot",
-                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+                 Requires<[HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -717,7 +719,8 @@ multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
 multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
   def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
                   opc, "\t$dst, $LHS, $RHS",
-                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> {
+                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+                  Requires<[HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -728,7 +731,8 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
   def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
                   IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
                   [(set GPR:$dst, (opnode GPR:$LHS,
-                                          (rotr GPR:$RHS, rot_imm:$rot)))]> {
+                                          (rotr GPR:$RHS, rot_imm:$rot)))]>,
+                  Requires<[HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -771,6 +775,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
 
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
 def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
                       "adr$p.w\t$dst, #$label", []> {
   let Inst{31-27} = 0b11110;
@@ -792,6 +798,7 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
   let Inst{19-16} = 0b1111; // Rn
   let Inst{15} = 0;
 }
+} // neverHasSideEffects
 
 // ADD r, sp, {so_imm|i12}
 def t2ADDrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -856,9 +863,11 @@ def t2SUBrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
   let Inst{15} = 0;
 }
 
-// Signed and unsigned division, for disassembly only
+// Signed and unsigned division on v7-M
 def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, 
-                 "sdiv", "\t$dst, $a, $b", []> {
+                 "sdiv", "\t$dst, $a, $b",
+                 [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+                 Requires<[HasDivide]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-21} = 0b011100;
   let Inst{20} = 0b1;
@@ -867,7 +876,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
 }
 
 def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, 
-                 "udiv", "\t$dst, $a, $b", []> {
+                 "udiv", "\t$dst, $a, $b",
+                 [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+                 Requires<[HasDivide]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-21} = 0b011101;
   let Inst{20} = 0b1;
@@ -878,11 +889,11 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
 // Pseudo instruction that will expand into a t2SUBrSPi + a copy.
 let usesCustomInserter = 1 in { // Expanded after instruction selection.
 def t2SUBrSPi_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                   NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>;
+                   NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
 def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
-                   NoItinerary, "@ subw\t$dst, $sp, $imm", []>;
+                   NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
 def t2SUBrSPs_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
-                   NoItinerary, "@ sub\t$dst, $sp, $rhs", []>;
+                   NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
 } // usesCustomInserter
 
 
@@ -902,7 +913,7 @@ defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8  node:$Src)>>;
 defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
 defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8  node:$Src)>>;
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
                         (ins t2addrmode_imm8s4:$addr),
@@ -912,7 +923,7 @@ def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
                        "ldrd", "\t$dst1, $addr", []> {
   let Inst{19-16} = 0b1111; // Rn
 }
-}
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
 
 // zextload i1 -> zextload i8
 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
@@ -955,7 +966,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
             (t2LDRHpci  tconstpool:$addr)>;
 
 // Indexed loads
-let mayLoad = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1 in {
 def t2LDR_PRE  : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
@@ -1011,7 +1022,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
                             AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
                         "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
                             []>;
-}
+} // mayLoad = 1, neverHasSideEffects = 1 
 
 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
 // for disassembly only.
@@ -1041,7 +1052,7 @@ defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
 defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 // Store doubleword
-let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
                        (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
                IIC_iStorer, "strd", "\t$src1, $addr", []>;
@@ -1204,7 +1215,7 @@ defm t2PLI  : T2Ipl<1, 0, "pli">;
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
                           reglist:$dsts, variable_ops), IIC_iLoadm,
                  "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
@@ -1227,9 +1238,9 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
   let Inst{21} = 1; // The W bit.
   let Inst{20} = 1; // Load
 }
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
                           reglist:$srcs, variable_ops), IIC_iStorem,
                  "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
@@ -1253,7 +1264,7 @@ def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
   let Inst{21} = 1; // The W bit.
   let Inst{20} = 0; // Store
 }
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
@@ -1564,9 +1575,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
 }
 
 let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
-                         "lsrs.w\t$dst, $src, #1",
-                         [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                        "lsrs", ".w\t$dst, $src, #1",
+                        [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
@@ -1577,9 +1588,9 @@ def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
   let Inst{14-12} = 0b000;
   let Inst{7-6} = 0b01;
 }
-def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
-                         "asrs.w\t$dst, $src, #1",
-                         [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                        "asrs", ".w\t$dst, $src, #1",
+                        [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
@@ -2058,7 +2069,8 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
                   IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
                   [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
                                       (and (shl GPR:$src2, (i32 imm:$shamt)),
-                                           0xFFFF0000)))]> {
+                                           0xFFFF0000)))]>,
+                  Requires<[HasT2ExtractPack]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-20} = 0b01100;
@@ -2068,15 +2080,18 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
 
 // Alternate cases for PKHBT where identities eliminate some nodes.
 def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
-            (t2PKHBT GPR:$src1, GPR:$src2, 0)>;
+            (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+            Requires<[HasT2ExtractPack]>;
 def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
-            (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+            (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+            Requires<[HasT2ExtractPack]>;
 
 def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
                   IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
                   [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
                                       (and (sra GPR:$src2, imm16_31:$shamt),
-                                           0xFFFF)))]> {
+                                           0xFFFF)))]>,
+                  Requires<[HasT2ExtractPack]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-20} = 0b01100;
@@ -2087,10 +2102,12 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
 // Alternate cases for PKHTB where identities eliminate some nodes.  Note that
 // a shift amount of 0 is *not legal* here, it is PKHBT instead.
 def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
-            (t2PKHTB GPR:$src1, GPR:$src2, 16)>;
+            (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+            Requires<[HasT2ExtractPack]>;
 def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
                      (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
-            (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+            (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+            Requires<[HasT2ExtractPack]>;
 
 //===----------------------------------------------------------------------===//
 //  Comparison Instructions...
@@ -2127,6 +2144,7 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
 def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
                    "mov", ".w\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2178,6 +2196,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
                              (ins GPR:$false, GPR:$true, i32imm:$rhs),
                              IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
                  RegConstraint<"$false = $dst">;
+} // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
 // Atomic operations intrinsics
@@ -2378,13 +2397,13 @@ let Defs =
     D31 ] in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
                                AddrModeNone, SizeSpecial, NoItinerary,
-                               "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                               "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
                                "\tmov\t$val, pc\n"
-                               "\tadds\t$val, #9\n"
+                               "\tadds\t$val, #7\n"
                                "\tstr\t$val, [$src, #4]\n"
                                "\tmovs\tr0, #0\n"
                                "\tb\t1f\n"
-                               "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                               "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
                                "1:", "",
                           [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
                              Requires<[IsThumb2, HasVFP2]>;
@@ -2394,13 +2413,13 @@ let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ] in {
   def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
                                AddrModeNone, SizeSpecial, NoItinerary,
-                               "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                               "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
                                "\tmov\t$val, pc\n"
-                               "\tadds\t$val, #9\n"
+                               "\tadds\t$val, #7\n"
                                "\tstr\t$val, [$src, #4]\n"
                                "\tmovs\tr0, #0\n"
                                "\tb\t1f\n"
-                               "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                               "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
                                "1:", "",
                           [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
                                   Requires<[IsThumb2, NoVFP]>;
@@ -2672,7 +2691,7 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 // scheduling.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
 def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
-                   NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+                   NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 36fcaa1..54474cf 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -76,7 +76,7 @@ def VSTRS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
                            variable_ops), IndexModeNone, IIC_fpLoadm,
                   "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
@@ -104,9 +104,9 @@ def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                       "$addr.base = $wb", []> {
   let Inst{20} = 1;
 }
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
                            variable_ops), IndexModeNone, IIC_fpStorem,
                   "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
@@ -134,7 +134,7 @@ def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                       "$addr.base = $wb", []> {
   let Inst{20} = 0;
 }
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
 
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
@@ -313,6 +313,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
                  IIC_fpMOVIS, "vmov", "\t$dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
+let neverHasSideEffects = 1 in {
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
                  IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
@@ -326,6 +327,7 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                  [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }
+} // neverHasSideEffects
 
 // FMDHR: GPR -> SPR
 // FMDLR: GPR -> SPR
@@ -337,6 +339,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
   let Inst{7-6} = 0b00;
 }
 
+let neverHasSideEffects = 1 in
 def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
@@ -606,6 +609,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
 // FP Conditional moves.
 //
 
+let neverHasSideEffects = 1 in {
 def VMOVDcc  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
                     IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
@@ -629,7 +633,7 @@ def VNEGScc  : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
                     IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
-
+} // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
 // Misc.
@@ -651,6 +655,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
 
 // FPSCR <-> GPR (for disassembly only)
 
+let neverHasSideEffects = 1 in {
 let Uses = [FPSCR] in {
 def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
                  "\t$dst, fpscr",
@@ -674,6 +679,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
   let Inst{4}     = 1;
 }
 }
+} // neverHasSideEffects
 
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index b31a4fa..5f6d7ee 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -318,6 +318,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
       *((intptr_t*)RelocPos) |= ResultPtr;
       break;
     }
+    case ARM::reloc_arm_movw: {
+      ResultPtr = ResultPtr & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+      break;
+    }
+    case ARM::reloc_arm_movt: {
+      ResultPtr = (ResultPtr >> 16) & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+      break;
+    }
     }
   }
 }
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 041afd0..8edfb9a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,16 +23,6 @@ namespace llvm {
   class ARMBaseInstrInfo;
   class Type;
 
-namespace ARM {
-  /// SubregIndex - The index of various subregister classes. Note that 
-  /// these indices must be kept in sync with the class indices in the 
-  /// ARMRegisterInfo.td file.
-  enum SubregIndex {
-    SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4,
-    DSUBREG_0 = 5, DSUBREG_1 = 6
-  };
-}
-
 struct ARMRegisterInfo : public ARMBaseRegisterInfo {
 public:
   ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 0d4200c..6beca8b 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -23,6 +23,44 @@ class ARMFReg<bits<6> num, string n> : Register<n> {
   let Namespace = "ARM";
 }
 
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0  : SubRegIndex;
+def ssub_1  : SubRegIndex;
+def ssub_2  : SubRegIndex; // In a Q reg.
+def ssub_3  : SubRegIndex;
+def ssub_4  : SubRegIndex; // In a QQ reg.
+def ssub_5  : SubRegIndex;
+def ssub_6  : SubRegIndex;
+def ssub_7  : SubRegIndex;
+def ssub_8  : SubRegIndex; // In a QQQQ reg.
+def ssub_9  : SubRegIndex;
+def ssub_10 : SubRegIndex;
+def ssub_11 : SubRegIndex;
+def ssub_12 : SubRegIndex;
+def ssub_13 : SubRegIndex;
+def ssub_14 : SubRegIndex;
+def ssub_15 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
 // Integer registers
 def R0  : ARMReg< 0, "r0">,  DwarfRegNum<[0]>;
 def R1  : ARMReg< 1, "r1">,  DwarfRegNum<[1]>;
@@ -58,9 +96,9 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
 def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
 def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
 def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
-def SDummy : ARMFReg<63, "sINVALID">;
 
 // Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
 def D0  : ARMReg< 0,  "d0", [S0,   S1]>;
 def D1  : ARMReg< 1,  "d1", [S2,   S3]>;
 def D2  : ARMReg< 2,  "d2", [S4,   S5]>;
@@ -77,6 +115,7 @@ def D12 : ARMReg<12, "d12", [S24, S25]>;
 def D13 : ARMReg<13, "d13", [S26, S27]>;
 def D14 : ARMReg<14, "d14", [S28, S29]>;
 def D15 : ARMReg<15, "d15", [S30, S31]>;
+}
 
 // VFP3 defines 16 additional double registers
 def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
@@ -89,6 +128,9 @@ def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
 def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
 
 // Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+                     (ssub_3 dsub_1, ssub_1)] in {
 def Q0  : ARMReg< 0,  "q0", [D0,   D1]>;
 def Q1  : ARMReg< 1,  "q1", [D2,   D3]>;
 def Q2  : ARMReg< 2,  "q2", [D4,   D5]>;
@@ -97,6 +139,8 @@ def Q4  : ARMReg< 4,  "q4", [D8,   D9]>;
 def Q5  : ARMReg< 5,  "q5", [D10, D11]>;
 def Q6  : ARMReg< 6,  "q6", [D12, D13]>;
 def Q7  : ARMReg< 7,  "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
 def Q8  : ARMReg< 8,  "q8", [D16, D17]>;
 def Q9  : ARMReg< 9,  "q9", [D18, D19]>;
 def Q10 : ARMReg<10, "q10", [D20, D21]>;
@@ -105,6 +149,51 @@ def Q12 : ARMReg<12, "q12", [D24, D25]>;
 def Q13 : ARMReg<13, "q13", [D26, D27]>;
 def Q14 : ARMReg<14, "q14", [D28, D29]>;
 def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g. vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technical the
+// starting D register number doesn't have to be multiple of 4. e.g. 
+// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register
+// stuffs very messy.
+let SubRegIndices = [qsub_0, qsub_1] in {
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
+                        (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
+                        (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0,  Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2,  Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4,  Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6,  Q7]>;
+}
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ4 : ARMReg<4, "qq4", [Q8,  Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1] in {
+let CompositeIndices = [(qsub_2  qqsub_1, qsub_0), (qsub_3  qqsub_1, qsub_1),
+                        (dsub_4  qqsub_1, dsub_0), (dsub_5  qqsub_1, dsub_1),
+                        (dsub_6  qqsub_1, dsub_2), (dsub_7  qqsub_1, dsub_3),
+                        (ssub_8  qqsub_1, ssub_0), (ssub_9  qqsub_1, ssub_1),
+                        (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
+                        (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
+                        (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in {
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+}
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+                        (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+                        (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+}
 
 // Current Program Status Register.
 def CPSR  : ARMReg<0, "cpsr">;
@@ -270,11 +359,6 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32,
                           [S0, S1,  S2,  S3,  S4,  S5,  S6,  S7,
                            S8, S9, S10, S11, S12, S13, S14, S15]>;
 
-// Dummy f32 regclass to represent impossible subreg indices.
-def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> {
-  let CopyCost = -1;
-}
-
 // Scalar double precision floating point / generic 64-bit vector register
 // class.
 // ARM requires only word alignment for double. It's more performant if it
@@ -284,7 +368,6 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                          D8,  D9,  D10, D11, D12, D13, D14, D15,
                          D16, D17, D18, D19, D20, D21, D22, D23,
                          D24, D25, D26, D27, D28, D29, D30, D31]> {
-  let SubRegClassList = [SPR_INVALID, SPR_INVALID];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -332,79 +415,68 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
 def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                              [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
                               D8,  D9,  D10, D11, D12, D13, D14, D15]> {
-  let SubRegClassList = [SPR, SPR];
+  let SubRegClasses = [(SPR ssub_0, ssub_1)];
 }
 
 // Subset of DPR which can be used as a source of NEON scalars for 16-bit
 // operations
 def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                           [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7]> {
-  let SubRegClassList = [SPR_8, SPR_8];
+  let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
 }
 
 // Generic 128-bit vector register class.
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
                         [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
                          Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15]> {
-  let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID,
-                         DPR, DPR];
+  let SubRegClasses = [(DPR dsub_0, dsub_1)];
 }
 
 // Subset of QPR that have 32-bit SPR subregs.
 def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
                              128,
                              [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7]> {
-  let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2];
+  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_VFP2 dsub_0, dsub_1)];
 }
 
 // Subset of QPR that have DPR_8 and SPR_8 subregs.
 def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
                            128,
                            [Q0,  Q1,  Q2,  Q3]> {
-  let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8];
+  let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64],
+                         256,
+                         [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+                       (QPR qsub_0, qsub_1)];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
+                              256,
+                              [QQ0, QQ1, QQ2, QQ3]> {
+  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+                       (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64],
+                         256,
+                         [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+                            dsub_4, dsub_5, dsub_6, dsub_7),
+                       (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
 }
 
 // Condition code registers.
 def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
 
-//===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def arm_ssubreg_0 : PatLeaf<(i32 1)>;
-def arm_ssubreg_1 : PatLeaf<(i32 2)>;
-def arm_ssubreg_2 : PatLeaf<(i32 3)>;
-def arm_ssubreg_3 : PatLeaf<(i32 4)>;
-def arm_dsubreg_0 : PatLeaf<(i32 5)>;
-def arm_dsubreg_1 : PatLeaf<(i32 6)>;
-
-// S sub-registers of D registers.
-def : SubRegSet<1, [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                    D8,  D9,  D10, D11, D12, D13, D14, D15],
-                   [S0,  S2,  S4,  S6,  S8,  S10, S12, S14,
-                    S16, S18, S20, S22, S24, S26, S28, S30]>;
-def : SubRegSet<2, [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                    D8,  D9,  D10, D11, D12, D13, D14, D15],
-                   [S1,  S3,  S5,  S7,  S9,  S11, S13, S15,
-                    S17, S19, S21, S23, S25, S27, S29, S31]>;
-
-// S sub-registers of Q registers.
-def : SubRegSet<1, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7],
-                   [S0,  S4,  S8,  S12, S16, S20, S24, S28]>;
-def : SubRegSet<2, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7],
-                   [S1,  S5,  S9,  S13, S17, S21, S25, S29]>;
-def : SubRegSet<3, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7],
-                   [S2,  S6,  S10, S14, S18, S22, S26, S30]>;
-def : SubRegSet<4, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7],
-                   [S3,  S7,  S11, S15, S19, S23, S27, S31]>;
-
-// D sub-registers of Q registers.
-def : SubRegSet<5, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
-                    Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15],
-                   [D0,  D2,  D4,  D6,  D8,  D10, D12, D14,
-                    D16, D18, D20, D22, D24, D26, D28, D30]>;
-def : SubRegSet<6, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
-                    Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15],
-                   [D1,  D3,  D5,  D7,  D9,  D11, D13, D15,
-                    D17, D19, D21, D23, D25, D27, D29, D31]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 2cc2950..86e7206 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -47,7 +47,13 @@ namespace llvm {
       reloc_arm_pic_jt,
 
       // reloc_arm_branch - Branch address relocation.
-      reloc_arm_branch
+      reloc_arm_branch,
+
+      // reloc_arm_movt  - MOVT immediate relocation.
+      reloc_arm_movt,
+
+      // reloc_arm_movw  - MOVW immediate relocation.
+      reloc_arm_movw
     };
   }
 }
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index c04ee38..a289407 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -12,11 +12,123 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "arm-selectiondag-info"
-#include "ARMSelectionDAGInfo.h"
+#include "ARMTargetMachine.h"
 using namespace llvm;
 
-ARMSelectionDAGInfo::ARMSelectionDAGInfo() {
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM),
+    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
 }
 
 ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
 }
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, unsigned Align,
+                                             bool isVolatile, bool AlwaysInline,
+                                             const Value *DstSV,
+                                             uint64_t DstSVOff,
+                                             const Value *SrcSV,
+                                             uint64_t SrcSVOff) const {
+  // Do repeated 4-byte loads and stores. To be improved.
+  // This requires 4-byte alignment.
+  if ((Align & 3) != 0)
+    return SDValue();
+  // This requires the copy size to be a constant, preferrably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  unsigned BytesLeft = SizeVal & 3;
+  unsigned NumMemOps = SizeVal >> 2;
+  unsigned EmittedNumMemOps = 0;
+  EVT VT = MVT::i32;
+  unsigned VTSize = 4;
+  unsigned i = 0;
+  const unsigned MAX_LOADS_IN_LDM = 6;
+  SDValue TFOps[MAX_LOADS_IN_LDM];
+  SDValue Loads[MAX_LOADS_IN_LDM];
+  uint64_t SrcOff = 0, DstOff = 0;
+
+  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+  // same number of stores.  The loads and stores will get combined into
+  // ldm/stm later on.
+  while (EmittedNumMemOps < NumMemOps) {
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      Loads[i] = DAG.getLoad(VT, dl, Chain,
+                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                         DAG.getConstant(SrcOff, MVT::i32)),
+                             SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
+      TFOps[i] = Loads[i].getValue(1);
+      SrcOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                          DAG.getConstant(DstOff, MVT::i32)),
+                              DstSV, DstSVOff + DstOff, isVolatile, false, 0);
+      DstOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    EmittedNumMemOps += i;
+  }
+
+  if (BytesLeft == 0)
+    return Chain;
+
+  // Issue loads / stores for the trailing (1 - 3) bytes.
+  unsigned BytesLeftSave = BytesLeft;
+  i = 0;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    Loads[i] = DAG.getLoad(VT, dl, Chain,
+                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                       DAG.getConstant(SrcOff, MVT::i32)),
+                           SrcSV, SrcSVOff + SrcOff, false, false, 0);
+    TFOps[i] = Loads[i].getValue(1);
+    ++i;
+    SrcOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+  i = 0;
+  BytesLeft = BytesLeftSave;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                        DAG.getConstant(DstOff, MVT::i32)),
+                            DstSV, DstSVOff + DstOff, false, false, 0);
+    ++i;
+    DstOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index afe9a47..d7d00c2 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -19,9 +19,24 @@
 namespace llvm {
 
 class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+
 public:
-  ARMSelectionDAGInfo();
+  explicit ARMSelectionDAGInfo(const TargetMachine &TM);
   ~ARMSelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  const Value *DstSV,
+                                  uint64_t DstSVOff,
+                                  const Value *SrcSV,
+                                  uint64_t SrcSVOff) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b11580a..10fd257 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -39,6 +39,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , IsR9Reserved(ReserveR9)
   , UseMovt(UseMOVT)
   , HasFP16(false)
+  , HasHardwareDivide(false)
+  , HasT2ExtractPack(false)
   , stackAlignment(4)
   , CPUString("generic")
   , TargetType(isELF) // Default to ELF unless otherwise specified.
@@ -73,6 +75,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
     unsigned SubVer = TT[Idx];
     if (SubVer >= '7' && SubVer <= '9') {
       ARMArchVersion = V7A;
+      if (Len >= Idx+2 && TT[Idx+1] == 'm')
+        ARMArchVersion = V7M;
     } else if (SubVer == '6') {
       ARMArchVersion = V6;
       if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 288a19a..8332bba 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
 class ARMSubtarget : public TargetSubtarget {
 protected:
   enum ARMArchEnum {
-    V4, V4T, V5T, V5TE, V6, V6T2, V7A
+    V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
   };
 
   enum ARMFPEnum {
@@ -39,7 +39,7 @@ protected:
   };
 
   /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
-  /// V6, V6T2, V7A.
+  /// V6, V6T2, V7A, V7M.
   ARMArchEnum ARMArchVersion;
 
   /// ARMFPUType - Floating Point Unit type.
@@ -74,6 +74,13 @@ protected:
   /// only so far)
   bool HasFP16;
 
+  /// HasHardwareDivide - True if subtarget supports [su]div
+  bool HasHardwareDivide;
+
+  /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+  /// instructions.
+  bool HasT2ExtractPack;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -123,6 +130,8 @@ protected:
   bool hasNEON() const { return ARMFPUType >= NEON;  }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
+  bool hasDivide() const { return HasHardwareDivide; }
+  bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
 
   bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 662e61e..b4a9252 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -62,7 +62,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
-    TLInfo(*this) {
+    TLInfo(*this),
+    TSInfo(*this) {
 }
 
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -76,7 +77,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
-    TLInfo(*this) {
+    TLInfo(*this),
+    TSInfo(*this) {
 }
 
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 4e205df..a222e57 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -21,6 +21,7 @@
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -63,6 +64,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMInstrInfo        InstrInfo;
   const TargetData    DataLayout;       // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
+  ARMSelectionDAGInfo TSInfo;
 public:
   ARMTargetMachine(const Target &T, const std::string &TT,
                    const std::string &FS);
@@ -75,6 +77,10 @@ public:
     return &TLInfo;
   }
 
+  virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 };
@@ -88,6 +94,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   OwningPtr<ARMBaseInstrInfo> InstrInfo;
   const TargetData    DataLayout;   // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
+  ARMSelectionDAGInfo TSInfo;
 public:
   ThumbTargetMachine(const Target &T, const std::string &TT,
                      const std::string &FS);
@@ -101,6 +108,10 @@ public:
     return &TLInfo;
   }
 
+  virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   /// returns either Thumb1InstrInfo or Thumb2InstrInfo
   virtual const ARMBaseInstrInfo *getInstrInfo() const {
     return InstrInfo.get();
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 80a9d2d..d95efdb 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -319,16 +319,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
     if (Modifier && strcmp(Modifier, "dregpair") == 0) {
-      unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, 5);// arm_dsubreg_0
-      unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, 6);// arm_dsubreg_1
+      unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
+      unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
       O << '{'
         << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
         << '}';
     } else if (Modifier && strcmp(Modifier, "lane") == 0) {
       unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
       unsigned DReg =
-        TM.getRegisterInfo()->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1,
-                                                  &ARM::DPR_VFP2RegClass);
+        TM.getRegisterInfo()->getMatchingSuperReg(Reg,
+          RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
       O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
     } else {
       assert(!MO.getSubReg() && "Subregs should be eliminated!");
@@ -1375,13 +1375,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
   case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
     // This is a hack that lowers as a two instruction sequence.
     unsigned DstReg = MI->getOperand(0).getReg();
-    unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
-    
+    const MachineOperand &MO = MI->getOperand(1);
+    MCOperand V1, V2;
+    if (MO.isImm()) {
+      unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
+      V1 = MCOperand::CreateImm(ImmVal & 65535);
+      V2 = MCOperand::CreateImm(ImmVal >> 16);
+    } else if (MO.isGlobal()) {
+      MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
+      const MCSymbolRefExpr *SymRef1 =
+	MCSymbolRefExpr::Create(Symbol,
+				MCSymbolRefExpr::VK_ARM_LO16, OutContext);
+      const MCSymbolRefExpr *SymRef2 =
+	MCSymbolRefExpr::Create(Symbol,
+				MCSymbolRefExpr::VK_ARM_HI16, OutContext);
+      V1 = MCOperand::CreateExpr(SymRef1);
+      V2 = MCOperand::CreateExpr(SymRef2);
+    } else {
+      MI->dump();
+      llvm_unreachable("cannot handle this operand");
+    }
+
     {
       MCInst TmpInst;
       TmpInst.setOpcode(ARM::MOVi16);
       TmpInst.addOperand(MCOperand::CreateReg(DstReg));         // dstreg
-      TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm)
+      TmpInst.addOperand(V1); // lower16(imm)
       
       // Predicate.
       TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
@@ -1395,7 +1414,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
       TmpInst.setOpcode(ARM::MOVTi16);
       TmpInst.addOperand(MCOperand::CreateReg(DstReg));         // dstreg
       TmpInst.addOperand(MCOperand::CreateReg(DstReg));         // srcreg
-      TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16));   // upper16(imm)
+      TmpInst.addOperand(V2);   // upper16(imm)
       
       // Predicate.
       TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index ac6331f..2b94b76 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -195,8 +195,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       // FIXME: Breaks e.g. ARM/vmul.ll.
       assert(0);
       /*
-      unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0
-      unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1
+      unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0);
+      unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1);
       O << '{'
       << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
       << '}';*/
@@ -217,7 +217,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
            ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
     O << '#' << Op.getImm();
   } else {
-    assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+    if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0)
+      llvm_unreachable("Unsupported modifier");
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     O << *Op.getExpr();
   }
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
index 383d30d..b81a306 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
@@ -26,7 +26,7 @@ namespace llvm {
   //class ARMSubtarget;
   
 /// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN ARMMCInstLower {
+class LLVM_LIBRARY_VISIBILITY ARMMCInstLower {
   MCContext &Ctx;
   Mangler &Mang;
   AsmPrinter &Printer;
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 3c0414d..0a4400c 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -118,7 +118,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
   ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
   const TargetMachine &TM = Fn.getTarget();
 
-  if (AFI->isThumbFunction())
+  if (AFI->isThumb1OnlyFunction())
     return false;
 
   TRI = TM.getRegisterInfo();
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index ef6bf3a..a725898 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -33,7 +33,8 @@ namespace {
 
   private:
     bool FormsRegSequence(MachineInstr *MI,
-                          unsigned FirstOpnd, unsigned NumRegs);
+                          unsigned FirstOpnd, unsigned NumRegs,
+                          unsigned Offset, unsigned Stride) const;
     bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
   };
 
@@ -338,24 +339,122 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   return false;
 }
 
-bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
-                                        unsigned FirstOpnd, unsigned NumRegs) {
-  MachineInstr *RegSeq = 0;
+bool
+NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
+                                   unsigned FirstOpnd, unsigned NumRegs,
+                                   unsigned Offset, unsigned Stride) const {
+  MachineOperand &FMO = MI->getOperand(FirstOpnd);
+  assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
+  unsigned VirtReg = FMO.getReg();
+  (void)VirtReg;
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "expected a virtual register");
+
+  unsigned LastSubIdx = 0;
+  if (FMO.isDef()) {
+    MachineInstr *RegSeq = 0;
+    for (unsigned R = 0; R < NumRegs; ++R) {
+      const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+      assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+      unsigned VirtReg = MO.getReg();
+      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+             "expected a virtual register");
+      // Feeding into a REG_SEQUENCE.
+      if (!MRI->hasOneNonDBGUse(VirtReg))
+        return false;
+      MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
+      if (!UseMI->isRegSequence())
+        return false;
+      if (RegSeq && RegSeq != UseMI)
+        return false;
+      unsigned OpIdx = 1 + (Offset + R * Stride) * 2;
+      if (UseMI->getOperand(OpIdx).getReg() != VirtReg)
+        llvm_unreachable("Malformed REG_SEQUENCE instruction!");
+      unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm();
+      if (LastSubIdx) {
+        if (LastSubIdx != SubIdx-Stride)
+          return false;
+      } else {
+        // Must start from dsub_0 or qsub_0.
+        if (SubIdx != (ARM::dsub_0+Offset) &&
+            SubIdx != (ARM::qsub_0+Offset))
+          return false;
+      }
+      RegSeq = UseMI;
+      LastSubIdx = SubIdx;
+    }
+
+    // In the case of vld3, etc., make sure the trailing operand of
+    // REG_SEQUENCE is an undef.
+    if (NumRegs == 3) {
+      unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2;
+      const MachineOperand &MO = RegSeq->getOperand(OpIdx);
+      unsigned VirtReg = MO.getReg();
+      MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+      if (!DefMI || !DefMI->isImplicitDef())
+        return false;
+    }
+    return true;
+  }
+
+  unsigned LastSrcReg = 0;
+  SmallVector<unsigned, 4> SubIds;
   for (unsigned R = 0; R < NumRegs; ++R) {
-    MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+    const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
     assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
     unsigned VirtReg = MO.getReg();
     assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
            "expected a virtual register");
-    if (!MRI->hasOneNonDBGUse(VirtReg))
+    // Extracting from a Q or QQ register.
+    MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+    if (!DefMI || !DefMI->isExtractSubreg())
       return false;
-    MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
-    if (UseMI->getOpcode() != TargetOpcode::REG_SEQUENCE)
+    VirtReg = DefMI->getOperand(1).getReg();
+    if (LastSrcReg && LastSrcReg != VirtReg)
       return false;
-    if (RegSeq && RegSeq != UseMI)
+    LastSrcReg = VirtReg;
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    if (RC != ARM::QPRRegisterClass &&
+        RC != ARM::QQPRRegisterClass &&
+        RC != ARM::QQQQPRRegisterClass)
       return false;
-    RegSeq = UseMI;
+    unsigned SubIdx = DefMI->getOperand(2).getImm();
+    if (LastSubIdx) {
+      if (LastSubIdx != SubIdx-Stride)
+        return false;
+    } else {
+      // Must start from dsub_0 or qsub_0.
+      if (SubIdx != (ARM::dsub_0+Offset) &&
+          SubIdx != (ARM::qsub_0+Offset))
+        return false;
+    }
+    SubIds.push_back(SubIdx);
+    LastSubIdx = SubIdx;
   }
+
+  // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
+  // currently required for correctness. e.g.
+  //  %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+  //  %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
+  //  %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
+  //  VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
+  // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
+  // respectively.
+  // We need to change how we model uses of REG_SEQUENCE.
+  for (unsigned R = 0; R < NumRegs; ++R) {
+    MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+    unsigned OldReg = MO.getReg();
+    MachineInstr *DefMI = MRI->getVRegDef(OldReg);
+    assert(DefMI->isExtractSubreg());
+    MO.setReg(LastSrcReg);
+    MO.setSubReg(SubIds[R]);
+    if (R != 0)
+      MO.setIsKill(false);
+    // Delete the EXTRACT_SUBREG if its result is now dead.
+    if (MRI->use_empty(OldReg))
+      DefMI->eraseFromParent();
+  }
+
   return true;
 }
 
@@ -368,7 +467,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
     unsigned FirstOpnd, NumRegs, Offset, Stride;
     if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
       continue;
-    if (FormsRegSequence(MI, FirstOpnd, NumRegs))
+    if (llvm::ModelWithRegSequence() &&
+        FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
       continue;
 
     MachineBasicBlock::iterator NextI = llvm::next(MBBI);
@@ -390,7 +490,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
       if (MO.isUse()) {
         // Insert a copy from VirtReg.
         TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
-                          ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+                          ARM::DPRRegisterClass, ARM::DPRRegisterClass,
+                          DebugLoc());
         if (MO.isKill()) {
           MachineInstr *CopyMI = prior(MBBI);
           CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
@@ -399,7 +500,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
       } else if (MO.isDef() && !MO.isDead()) {
         // Add a copy to VirtReg.
         TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
-                          ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+                          ARM::DPRRegisterClass, ARM::DPRRegisterClass,
+                          DebugLoc());
       }
     }
   }
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index b10c3f7..fae84d4 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -17,6 +17,7 @@
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/ADT/SmallVector.h"
@@ -36,10 +37,8 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
-                                   const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                                   const TargetRegisterClass *SrcRC,
+                                   DebugLoc DL) const {
   if (DestRC == ARM::GPRRegisterClass) {
     if (SrcRC == ARM::GPRRegisterClass) {
       BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
@@ -97,10 +96,8 @@ canFoldMemoryOperand(const MachineInstr *MI,
 void Thumb1InstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
   assert((RC == ARM::tGPRRegisterClass ||
           (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
            isARMLowRegister(SrcReg))) && "Unknown regclass!");
@@ -108,6 +105,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (RC == ARM::tGPRRegisterClass ||
       (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
        isARMLowRegister(SrcReg))) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
@@ -124,10 +124,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 void Thumb1InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
   assert((RC == ARM::tGPRRegisterClass ||
           (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
            isARMLowRegister(DestReg))) && "Unknown regclass!");
@@ -135,6 +133,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (RC == ARM::tGPRRegisterClass ||
       (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
        isARMLowRegister(DestReg))) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
@@ -150,7 +151,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 bool Thumb1InstrInfo::
 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
-                          const std::vector<CalleeSavedInfo> &CSI) const {
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
@@ -161,9 +163,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   AddDefaultPred(MIB);
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    MIB.addReg(Reg, RegState::Kill);
+    bool isKill = true;
+
+    // Add the callee-saved register as live-in unless it's LR and
+    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+    // then it's already added to the function and entry block live-in sets.
+    if (Reg == ARM::LR) {
+      MachineFunction &MF = *MBB.getParent();
+      if (MF.getFrameInfo()->isReturnAddressTaken() &&
+          MF.getRegInfo().isLiveIn(Reg))
+        isKill = false;
+    }
+
+    if (isKill) {
+      MBB.addLiveIn(Reg);
+      MIB.addReg(Reg, RegState::Kill);
+    }
   }
   return true;
 }
@@ -171,7 +186,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 bool Thumb1InstrInfo::
 restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MI,
-                            const std::vector<CalleeSavedInfo> &CSI) const {
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   if (CSI.empty())
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 516ddf1..c937296 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -39,25 +39,30 @@ public:
 
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
   bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
-                                   const std::vector<CalleeSavedInfo> &CSI) const;
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
 
   bool copyRegToReg(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
 
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
 
   bool canFoldMemoryOperand(const MachineInstr *MI,
                                     const SmallVectorImpl<unsigned> &Ops) const;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index b143bd9..531d5e9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -40,10 +40,8 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I,
                               unsigned DestReg, unsigned SrcReg,
                               const TargetRegisterClass *DestRC,
-                              const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                              const TargetRegisterClass *SrcRC,
+                              DebugLoc DL) const {
   if (DestRC == ARM::GPRRegisterClass) {
     if (SrcRC == ARM::GPRRegisterClass) {
       BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
@@ -63,17 +61,18 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   }
 
   // Handle SPR, DPR, and QPR copies.
-  return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC);
+  return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL);
 }
 
 void Thumb2InstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
   if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
@@ -87,17 +86,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     return;
   }
 
-  ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC);
+  ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
 }
 
 void Thumb2InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
   if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
@@ -110,7 +110,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     return;
   }
 
-  ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
+  ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
 }
 
 void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index a0f89a6..2948770 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -35,17 +35,20 @@ public:
                     MachineBasicBlock::iterator I,
                     unsigned DestReg, unsigned SrcReg,
                     const TargetRegisterClass *DestRC,
-                    const TargetRegisterClass *SrcRC) const;
+                    const TargetRegisterClass *SrcRC,
+                    DebugLoc DL) const;
 
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
                            unsigned SrcReg, bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC) const;
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
 
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI,
                             unsigned DestReg, int FrameIndex,
-                            const TargetRegisterClass *RC) const;
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index ba403e2..3aba363 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -146,16 +146,14 @@ bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator MI,
                                   unsigned DestReg, unsigned SrcReg,
                                   const TargetRegisterClass *DestRC,
-                                  const TargetRegisterClass *SrcRC) const {
+                                  const TargetRegisterClass *SrcRC,
+                                  DebugLoc DL) const {
   //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
   if (DestRC != SrcRC) {
     // Not yet supported!
     return false;
   }
 
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
   if (DestRC == Alpha::GPRCRegisterClass) {
     BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
       .addReg(SrcReg)
@@ -180,7 +178,8 @@ void
 AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned SrcReg, bool isKill, int FrameIdx,
-                                    const TargetRegisterClass *RC) const {
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
   //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
   //     << FrameIdx << "\n";
   //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
@@ -208,7 +207,8 @@ void
 AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
                                         unsigned DestReg, int FrameIdx,
-                                        const TargetRegisterClass *RC) const {
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const {
   //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
   //     << FrameIdx << "\n";
   DebugLoc DL;
@@ -399,7 +399,6 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, 
                                 MachineBasicBlock::iterator MI) const {
   DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
   BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
     .addReg(Alpha::R31)
     .addReg(Alpha::R31);
@@ -430,7 +429,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
 
   GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
-                              &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
+                              &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+                              DebugLoc());
   assert(Ok && "Couldn't assign to global base register!");
   Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Alpha::R29);
@@ -457,7 +457,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
 
   GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
-                              &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
+                              &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+                              DebugLoc());
   assert(Ok && "Couldn't assign to global return address register!");
   Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Alpha::R26);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index c3b6044..7d7365b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -48,16 +48,19 @@ public:
                             MachineBasicBlock::iterator MI,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
   
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index d5d5e02..a47a29b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -836,7 +836,7 @@ class br_fcc<bits<6> opc, string asmstr>
     !strconcat(asmstr, " $R,$dst"),  s_fbr>;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-let Ra = 31 in
+let Ra = 31, isBarrier = 1 in
 def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
 
 def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), 
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
index 0eb7b8f..f1958fe 100644
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "alpha-selectiondag-info"
-#include "AlphaSelectionDAGInfo.h"
+#include "AlphaTargetMachine.h"
 using namespace llvm;
 
-AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() {
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
index 70889ae..3405cc0 100644
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class AlphaTargetMachine;
+
 class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  AlphaSelectionDAGInfo();
+  explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
   ~AlphaSelectionDAGInfo();
 };
 
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 5169a01..fc9be03 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -32,7 +32,8 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
     FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
     JITInfo(*this),
     Subtarget(TT, FS),
-    TLInfo(*this) {
+    TLInfo(*this),
+    TSInfo(*this) {
   setRelocationModel(Reloc::PIC_);
 }
 
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 0990f6d..153944e 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -20,6 +20,7 @@
 #include "AlphaInstrInfo.h"
 #include "AlphaJITInfo.h"
 #include "AlphaISelLowering.h"
+#include "AlphaSelectionDAGInfo.h"
 #include "AlphaSubtarget.h"
 
 namespace llvm {
@@ -33,6 +34,7 @@ class AlphaTargetMachine : public LLVMTargetMachine {
   AlphaJITInfo JITInfo;
   AlphaSubtarget Subtarget;
   AlphaTargetLowering TLInfo;
+  AlphaSelectionDAGInfo TSInfo;
 
 public:
   AlphaTargetMachine(const Target &T, const std::string &TT,
@@ -47,6 +49,9 @@ public:
   virtual const AlphaTargetLowering* getTargetLowering() const {
     return &TLInfo;
   }
+  virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual AlphaJITInfo* getJITInfo() {
     return &JITInfo;
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index cf1901b..73924b7 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -138,9 +138,8 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                      unsigned DestReg,
                                      unsigned SrcReg,
                                      const TargetRegisterClass *DestRC,
-                                     const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-
+                                     const TargetRegisterClass *SrcRC,
+                                     DebugLoc DL) const {
   if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
       inClass(BF::ALLRegClass, SrcReg,  SrcRC)) {
     BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg);
@@ -196,7 +195,8 @@ BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        unsigned SrcReg,
                                        bool isKill,
                                        int FI,
-                                       const TargetRegisterClass *RC) const {
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
 
   if (inClass(BF::DPRegClass, SrcReg, RC)) {
@@ -242,7 +242,8 @@ BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator I,
                                         unsigned DestReg,
                                         int FI,
-                                        const TargetRegisterClass *RC) const {
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   if (inClass(BF::DPRegClass, DestReg, RC)) {
     BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index ea3429c..c1dcd58 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -50,13 +50,15 @@ namespace llvm {
                               MachineBasicBlock::iterator I,
                               unsigned DestReg, unsigned SrcReg,
                               const TargetRegisterClass *DestRC,
-                              const TargetRegisterClass *SrcRC) const;
+                              const TargetRegisterClass *SrcRC,
+                              DebugLoc DL) const;
 
     virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
                                      unsigned SrcReg, bool isKill,
                                      int FrameIndex,
-                                     const TargetRegisterClass *RC) const;
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const;
 
     virtual void storeRegToAddr(MachineFunction &MF,
                                 unsigned SrcReg, bool isKill,
@@ -67,7 +69,8 @@ namespace llvm {
     virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MBBI,
                                       unsigned DestReg, int FrameIndex,
-                                      const TargetRegisterClass *RC) const;
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const;
 
     virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                  SmallVectorImpl<MachineOperand> &Addr,
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 2471688..5cf350a 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -301,9 +301,9 @@ def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
 
 def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
 def : Pat<(i16 (extloadi8 P:$ptr)),
-          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
 def : Pat<(i16 (zextloadi8 P:$ptr)),
-          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
 
 def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
                          "$dst = b[$ptr + $off] (z);",
@@ -313,17 +313,17 @@ def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
           (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
 def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
           (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
-                           bfin_subreg_lo16)>;
+                           lo16)>;
 def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
           (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
-                           bfin_subreg_lo16)>;
+                           lo16)>;
 
 def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
                    "$dst = b[$ptr] (x);",
                    [(set D:$dst, (sextloadi8 P:$ptr))]>;
 
 def : Pat<(i16 (sextloadi8 P:$ptr)),
-          (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>;
+          (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
 
 def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
                          "$dst = b[$ptr + $off] (x);",
@@ -331,7 +331,7 @@ def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
 
 def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
           (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
-                           bfin_subreg_lo16)>;
+                           lo16)>;
 // Memory loads without patterns
 
 let mayLoad = 1 in {
@@ -468,16 +468,16 @@ def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
 
 def : Pat<(truncstorei16 D:$val, PI:$ptr),
           (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
-                                     bfin_subreg_lo16), PI:$ptr)>;
+                                     lo16), PI:$ptr)>;
 
 def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
           (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
-                                     bfin_subreg_hi16), PI:$ptr)>;
+                                     hi16), PI:$ptr)>;
 
 def : Pat<(truncstorei8 D16L:$val, P:$ptr),
           (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
                                   (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
-                                  bfin_subreg_lo16),
+                                  lo16),
                    P:$ptr)>;
 
 //===----------------------------------------------------------------------===//
@@ -516,19 +516,19 @@ def : Pat<(sext_inreg D16L:$src, i8),
           (EXTRACT_SUBREG (MOVEsext8
                            (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
                                           D16L:$src,
-                                          bfin_subreg_lo16)),
-                          bfin_subreg_lo16)>;
+                                          lo16)),
+                          lo16)>;
 
 def : Pat<(sext_inreg D:$src, i16),
-          (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+          (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
 
 def : Pat<(and D:$src, 0xffff),
-          (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+          (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
 
 def : Pat<(i32 (anyext D16L:$src)),
           (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
                          (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
-                         bfin_subreg_lo16)>;
+                         lo16)>;
 
 // TODO Dreg = Dreg_byte (X/Z)
 
@@ -859,4 +859,4 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
 def : Pat<(BfinCall (i32 texternalsym:$dst)),
           (CALLa texternalsym:$dst)>;
 def : Pat<(i16 (trunc D:$src)),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>;
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 2512c9b..5153ace 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -111,7 +111,7 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
 bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return DisableFramePointerElim(MF) ||
-    MFI->hasCalls() || MFI->hasVarSizedObjects();
+    MFI->adjustsStack() || MFI->hasVarSizedObjects();
 }
 
 bool BlackfinRegisterInfo::
@@ -177,11 +177,11 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
 
   // We must split into halves
   BuildMI(MBB, I, DL,
-          TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16))
+          TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
     .addImm((value >> 16) & 0xffff)
     .addReg(Reg, RegState::ImplicitDefine);
   BuildMI(MBB, I, DL,
-          TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16))
+          TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
     .addImm(value & 0xffff)
     .addReg(Reg, RegState::ImplicitKill)
     .addReg(Reg, RegState::ImplicitDefine);
@@ -394,7 +394,7 @@ void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
   }
 
   if (!hasFP(MF)) {
-    assert(!MFI->hasCalls() &&
+    assert(!MFI->adjustsStack() &&
            "FP elimination on a non-leaf function is not supported");
     adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
     return;
@@ -435,7 +435,7 @@ void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF,
   assert(FrameSize%4 == 0 && "Misaligned frame size");
 
   if (!hasFP(MF)) {
-    assert(!MFI->hasCalls() &&
+    assert(!MFI->adjustsStack() &&
            "FP elimination on a non-leaf function is not supported");
     adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
     return;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 7cfb120..03c5450 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -24,13 +24,6 @@ namespace llvm {
   class TargetInstrInfo;
   class Type;
 
-  // Subregister indices, keep in sync with BlackfinRegisterInfo.td
-  enum BfinSubregIdx {
-    bfin_subreg_lo16 = 1,
-    bfin_subreg_hi16 = 2,
-    bfin_subreg_lo32 = 3
-  };
-
   struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
     BlackfinSubtarget &Subtarget;
     const TargetInstrInfo &TII;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index d396cc8..e1cfae9 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -11,8 +11,18 @@
 //  Declarations that describe the Blackfin register file
 //===----------------------------------------------------------------------===//
 
-// Registers are identified with 3-bit group and 3-bit ID numbers.
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
 
+// Registers are identified with 3-bit group and 3-bit ID numbers.
 class BlackfinReg<string n> : Register<n> {
   field bits<3> Group;
   field bits<3> Num;
@@ -40,6 +50,7 @@ class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
 // Ra 40-bit accumulator registers
 class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
   let SubRegs = subs;
+  let SubRegIndices = [hi32, lo32];
   let Group = 4;
   let Num = num;
 }
@@ -54,6 +65,7 @@ multiclass Rss<bits<3> group, bits<3> num, string n> {
 class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
       : BlackfinReg<n> {
   let SubRegs = subs;
+  let SubRegIndices = [hi16, lo16];
   let Group = group;
   let Num = num;
 }
@@ -164,7 +176,7 @@ def RETN : Ri<7, 5, "retn">,  DwarfRegNum<[38]>;
 def RETE : Ri<7, 6, "rete">,  DwarfRegNum<[39]>;
 
 def ASTAT   : Ri<4, 6, "astat">,   DwarfRegNum<[40]> {
-  let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+  let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
 }
 
 def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
@@ -182,38 +194,6 @@ def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
 def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
 def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
 
-// Subregs are:
-// 1: .L
-// 2: .H
-// 3: .W (32 low bits of 40-bit accu)
-// Keep in sync with enum in BlackfinRegisterInfo.h
-def bfin_subreg_lo16  : PatLeaf<(i32 1)>;
-def bfin_subreg_hi16  : PatLeaf<(i32 2)>;
-def bfin_subreg_32bit : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1,
-    [R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,
-     P0,  P1,  P2,  P3,  P4,  P5,  SP,  FP,
-     I0,  I1,  I2,  I3,  M0,  M1,  M2,  M3,
-     B0,  B1,  B2,  B3,  L0,  L1,  L2,  L3],
-    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
-     P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL,
-     I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L,
-     B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>;
-
-def : SubRegSet<2,
-    [R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,
-     P0,  P1,  P2,  P3,  P4,  P5,  SP,  FP,
-     I0,  I1,  I2,  I3,  M0,  M1,  M2,  M3,
-     B0,  B1,  B2,  B3,  L0,  L1,  L2,  L3],
-    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
-     P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH,
-     I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H,
-     B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>;
-
-def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>;
-def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>;
-
 // Register classes.
 def D16 : RegisterClass<"BF", [i16], 16,
     [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
@@ -260,11 +240,11 @@ def GR16 : RegisterClass<"BF", [i16], 16,
      L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
 
 def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
-  let SubRegClassList = [D16L, D16H];
+  let SubRegClasses = [(D16L lo16), (D16H hi16)];
 }
 
 def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
-  let SubRegClassList = [P16L, P16H];
+  let SubRegClasses = [(P16L lo16), (P16H hi16)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -287,7 +267,7 @@ def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
 def DP : RegisterClass<"BF", [i32], 32,
     [R0, R1, R2, R3, R4, R5, R6, R7,
      P0, P1, P2, P3, P4, P5, FP, SP]> {
-  let SubRegClassList = [DP16L, DP16H];
+  let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
index f4bb25f..a21f696 100644
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -12,10 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "blackfin-selectiondag-info"
-#include "BlackfinSelectionDAGInfo.h"
+#include "BlackfinTargetMachine.h"
 using namespace llvm;
 
-BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() {
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+                                              const BlackfinTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
index a620330..f1ce348 100644
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class BlackfinTargetMachine;
+
 class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  BlackfinSelectionDAGInfo();
+  explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
   ~BlackfinSelectionDAGInfo();
 };
 
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 45d7c35..66a2f68 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -31,6 +31,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
     DataLayout("e-p:32:32-i64:32-f64:32-n32"),
     Subtarget(TT, FS),
     TLInfo(*this),
+    TSInfo(*this),
     InstrInfo(Subtarget),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) {
 }
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index 07e7394..a63aa54 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -20,6 +20,7 @@
 #include "BlackfinInstrInfo.h"
 #include "BlackfinSubtarget.h"
 #include "BlackfinISelLowering.h"
+#include "BlackfinSelectionDAGInfo.h"
 #include "BlackfinIntrinsicInfo.h"
 
 namespace llvm {
@@ -28,6 +29,7 @@ namespace llvm {
     const TargetData DataLayout;
     BlackfinSubtarget Subtarget;
     BlackfinTargetLowering TLInfo;
+    BlackfinSelectionDAGInfo TSInfo;
     BlackfinInstrInfo InstrInfo;
     TargetFrameInfo FrameInfo;
     BlackfinIntrinsicInfo IntrinsicInfo;
@@ -46,6 +48,9 @@ namespace llvm {
     virtual const BlackfinTargetLowering* getTargetLowering() const {
       return &TLInfo;
     }
+    virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+      return &TSInfo;
+    }
     virtual const TargetData *getTargetData() const { return &DataLayout; }
     virtual bool addInstSelector(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 67f513b..55b8aaa 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2165,6 +2165,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
    case CallingConv::X86_FastCall:
     Out << "__attribute__((fastcall)) ";
     break;
+   case CallingConv::X86_ThisCall:
+    Out << "__attribute__((thiscall)) ";
+    break;
    default:
     break;
   }
@@ -3554,11 +3557,11 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
 //                       External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
-                                              formatted_raw_ostream &o,
-                                              CodeGenFileType FileType,
-                                              CodeGenOpt::Level OptLevel,
-                                              bool DisableVerify) {
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                         formatted_raw_ostream &o,
+                                         CodeGenFileType FileType,
+                                         CodeGenOpt::Level OptLevel,
+                                         bool DisableVerify) {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
 
   PM.add(createGCLoweringPass());
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index d178e7f..6fed195 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -23,12 +23,11 @@ struct CTargetMachine : public TargetMachine {
   CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
     : TargetMachine(T) {}
 
-  virtual bool WantsWholeFile() const { return true; }
-  virtual bool addPassesToEmitWholeFile(PassManager &PM,
-                                        formatted_raw_ostream &Out,
-                                        CodeGenFileType FileType,
-                                        CodeGenOpt::Level OptLevel,
-                                        bool DisableVerify);
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level OptLevel,
+                                   bool DisableVerify);
   
   virtual const TargetData *getTargetData() const { return 0; }
 };
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
index 4783dd5..0e7ad35 100644
--- a/lib/Target/CellSPU/README.txt
+++ b/lib/Target/CellSPU/README.txt
@@ -10,6 +10,8 @@ Department in The Aerospace Corporation:
 - Chandler Carruth (LLVM expertise)
 - Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
 
+Some minor fixes added by Kalle Raiskila.
+
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 5e04454..081e8d0 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -485,7 +485,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   // Set pre-RA register scheduler default to BURR, which produces slightly
   // better code than the default (could also be TDRR, but TargetLowering.h
   // needs a mod to support that model):
-  setSchedulingPreference(SchedulingForRegPressure);
+  setSchedulingPreference(Sched::RegPressure);
 }
 
 const char *
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 423da3b..4c53c98 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -255,16 +255,14 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
-                                   const TargetRegisterClass *SrcRC) const
+                                   const TargetRegisterClass *SrcRC,
+                                   DebugLoc DL) const
 {
   // We support cross register class moves for our aliases, such as R3 in any
   // reg class to any other reg class containing R3.  This is required because
   // we instruction select bitconvert i64 -> f64 as a noop for example, so our
   // types have no specific meaning.
 
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
   if (DestRC == SPU::R8CRegisterClass) {
     BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
   } else if (DestRC == SPU::R16CRegisterClass) {
@@ -291,9 +289,10 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 
 void
 SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MI,
-                                     unsigned SrcReg, bool isKill, int FrameIdx,
-                                     const TargetRegisterClass *RC) const
+                                  MachineBasicBlock::iterator MI,
+                                  unsigned SrcReg, bool isKill, int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  const TargetRegisterInfo *TRI) const
 {
   unsigned opc;
   bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
@@ -325,9 +324,10 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 
 void
 SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        unsigned DestReg, int FrameIdx,
-                                        const TargetRegisterClass *RC) const
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const
 {
   unsigned opc;
   bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
@@ -467,6 +467,9 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   // If there is only one terminator instruction, process it.
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
     if (isUncondBranch(LastInst)) {
+      // Check for jump tables
+      if (!LastInst->getOperand(0).isMBB())
+        return true;
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     } else if (isCondBranch(LastInst)) {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 42677fc..6dabd7c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -60,19 +60,22 @@ namespace llvm {
                               MachineBasicBlock::iterator MI,
                               unsigned DestReg, unsigned SrcReg,
                               const TargetRegisterClass *DestRC,
-                              const TargetRegisterClass *SrcRC) const;
+                              const TargetRegisterClass *SrcRC,
+                              DebugLoc DL) const;
 
     //! Store a register to a stack slot, based on its register class.
     virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
                                      unsigned SrcReg, bool isKill, int FrameIndex,
-                                     const TargetRegisterClass *RC) const;
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const;
 
     //! Load a register from a stack slot, based on its register class.
     virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MBBI,
                                       unsigned DestReg, int FrameIndex,
-                                      const TargetRegisterClass *RC) const;
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const;
 
     //! Return true if the specified load or store can be folded
     virtual
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 6d1f87d..a7fb14c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -655,7 +655,7 @@ def SFHvec:
 def SFHr16:
     RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
       "sfh\t$rT, $rA, $rB", IntegerOp,
-      [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>;
+      [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
 
 def SFHIvec:
     RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
@@ -670,11 +670,11 @@ def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
 def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
                                   (ins VECREG:$rA, VECREG:$rB),
   "sf\t$rT, $rA, $rB", IntegerOp,
-  [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+  [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
 
 def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
   "sf\t$rT, $rA, $rB", IntegerOp,
-  [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>;
+  [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
 
 def SFIvec:
     RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index fdbe10f..d8937ec 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -469,7 +469,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
          && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
 
   // the "empty" frame size is 16 - just the register scavenger spill slot
-  if (FrameSize > 16 || MFI->hasCalls()) {
+  if (FrameSize > 16 || MFI->adjustsStack()) {
     FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
     if (hasDebugInfo) {
       // Mark effective beginning of when frame pointer becomes valid.
@@ -569,7 +569,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
          && "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
 
   // the "empty" frame size is 16 - just the register scavenger spill slot
-  if (FrameSize > 16 || MFI->hasCalls()) {
+  if (FrameSize > 16 || MFI->adjustsStack()) {
     FrameSize = FrameSize + SPUFrameInfo::minStackSize();
     if (isInt<10>(FrameSize + LinkSlotOffset)) {
       // Reload $lr, adjust $sp by required amount
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
index ca2a4bf..5732fd4 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "cellspu-selectiondag-info"
-#include "SPUSelectionDAGInfo.h"
+#include "SPUTargetMachine.h"
 using namespace llvm;
 
-SPUSelectionDAGInfo::SPUSelectionDAGInfo() {
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
index 0a6b4c1..39257d9 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class SPUTargetMachine;
+
 class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  SPUSelectionDAGInfo();
+  explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
   ~SPUSelectionDAGInfo();
 };
 
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 6500067..480ec3f 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -42,6 +42,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
     InstrInfo(*this),
     FrameInfo(*this),
     TLInfo(*this),
+    TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
   // For the time being, use static relocations, since there's really no
   // support for PIC yet.
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 37e7cd2..7e02701 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -17,6 +17,7 @@
 #include "SPUSubtarget.h"
 #include "SPUInstrInfo.h"
 #include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
 #include "SPUFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
@@ -34,6 +35,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUInstrInfo        InstrInfo;
   SPUFrameInfo        FrameInfo;
   SPUTargetLowering   TLInfo;
+  SPUSelectionDAGInfo TSInfo;
   InstrItineraryData  InstrItins;
 public:
   SPUTargetMachine(const Target &T, const std::string &TT,
@@ -61,6 +63,10 @@ public:
    return &TLInfo;
   }
 
+  virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   virtual const SPURegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index e739b26..45a0c84 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1038,6 +1038,11 @@ namespace {
       Out << ");";
       nl(Out);
     }
+    if (GV->isThreadLocal()) {
+      printCppName(GV);
+      Out << "->setThreadLocal(true);";
+      nl(Out);
+    }
     if (is_inline) {
       out(); Out << "}"; nl(Out);
     }
@@ -2007,11 +2012,11 @@ char CppWriter::ID = 0;
 //                       External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
-                                                formatted_raw_ostream &o,
-                                                CodeGenFileType FileType,
-                                                CodeGenOpt::Level OptLevel,
-                                                bool DisableVerify) {
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                           formatted_raw_ostream &o,
+                                           CodeGenFileType FileType,
+                                           CodeGenOpt::Level OptLevel,
+                                           bool DisableVerify) {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   PM.add(new CppWriter(o));
   return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index b7aae91..e42166e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -26,12 +26,11 @@ struct CPPTargetMachine : public TargetMachine {
                    const std::string &FS)
     : TargetMachine(T) {}
 
-  virtual bool WantsWholeFile() const { return true; }
-  virtual bool addPassesToEmitWholeFile(PassManager &PM,
-                                        formatted_raw_ostream &Out,
-                                        CodeGenFileType FileType,
-                                        CodeGenOpt::Level OptLevel,
-                                        bool DisableVerify);
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level OptLevel,
+                                   bool DisableVerify);
 
   virtual const TargetData *getTargetData() const { return 0; }
 };
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index 04dfb0a..e42e9b3 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -155,7 +155,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
     CPUBitmask |= (1 << MBlazeRegisterInfo::
                 getRegisterNumbering(RI.getFrameRegister(*MF)));
 
-  if (MFI->hasCalls())
+  if (MFI->adjustsStack())
     CPUBitmask |= (1 << MBlazeRegisterInfo::
                 getRegisterNumbering(RI.getRARegister()));
 
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 01f3174..4c4d86b 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -107,7 +107,6 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
 void MBlazeInstrInfo::
 insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
   DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
   BuildMI(MBB, MI, DL, get(MBlaze::NOP));
 }
 
@@ -115,8 +114,8 @@ bool MBlazeInstrInfo::
 copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
              unsigned DestReg, unsigned SrcReg,
              const TargetRegisterClass *DestRC,
-             const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
+             const TargetRegisterClass *SrcRC,
+             DebugLoc DL) const {
   llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
       .addReg(SrcReg).addReg(MBlaze::R0);
   return true;
@@ -125,7 +124,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 void MBlazeInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC) const {
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
     .addImm(0).addFrameIndex(FI);
@@ -134,7 +134,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 void MBlazeInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const {
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
       .addImm(0).addFrameIndex(FI);
@@ -210,7 +211,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
                               MBlaze::CPURegsRegisterClass,
-                              MBlaze::CPURegsRegisterClass);
+                              MBlaze::CPURegsRegisterClass,
+                              DebugLoc());
   assert(Ok && "Couldn't assign to global base register!");
   Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(MBlaze::R20);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index 4f79f1c..c9fdc88 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -203,16 +203,19 @@ public:
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index e15176e..f15eea9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -220,7 +220,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
     StackOffset += RegSize;
   }
 
-  if (MFI->hasCalls()) {
+  if (MFI->adjustsStack()) {
     MBlazeFI->setRAStackOffset(0);
     MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                          StackOffset);
@@ -311,8 +311,8 @@ emitPrologue(MachineFunction &MF) const {
   unsigned StackSize = MFI->getStackSize();
 
   // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->hasCalls()) return;
-  if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
+  if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
 
   int FPOffset = MBlazeFI->getFPStackOffset();
   int RAOffset = MBlazeFI->getRAStackOffset();
@@ -323,7 +323,7 @@ emitPrologue(MachineFunction &MF) const {
 
   // Save the return address only if the function isnt a leaf one.
   // swi  R15, R1, stack_loc
-  if (MFI->hasCalls()) {
+  if (MFI->adjustsStack()) {
     BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
         .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
   }
@@ -366,14 +366,14 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
 
   // Restore the return address only if the function isnt a leaf one.
   // lwi R15, R1, stack_loc
-  if (MFI->hasCalls()) {
+  if (MFI->adjustsStack()) {
     BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
       .addImm(RAOffset).addReg(MBlaze::R1);
   }
 
   // Get the number of bytes from FrameInfo
   int StackSize = (int) MFI->getStackSize();
-  if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+  if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
 
   // adjust stack.
   // addi R1, R1, imm
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 96a5c98..d0a1e75 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -17,21 +17,15 @@ class MBlazeReg<string n> : Register<n> {
   let Namespace = "MBlaze";
 }
 
-class MBlazeRegWithSubRegs<string n, list<Register> subregs>
-  : RegisterWithSubRegs<n, subregs> {
-  field bits<5> Num;
-  let Namespace = "MBlaze";
-}
-
 // MBlaze CPU Registers
 class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
   let Num = num;
 }
 
 // MBlaze 32-bit (aliased) FPU Registers
-class FPR<bits<5> num, string n, list<Register> subregs>
-  : MBlazeRegWithSubRegs<n, subregs> {
+class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
   let Num = num;
+  let Aliases = aliases;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
index 105e42a..6a115b2 100644
--- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mblaze-selectiondag-info"
-#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeTargetMachine.h"
 using namespace llvm;
 
-MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() {
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
index 11e6879..9f8e2aa 100644
--- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class MBlazeTargetMachine;
+
 class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  MBlazeSelectionDAGInfo();
+  explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
   ~MBlazeSelectionDAGInfo();
 };
 
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 9eba2b3..4252953 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -39,7 +39,7 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
              "f64:32:32-v64:32:32-v128:32:32-n32"),
   InstrInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
-  TLInfo(*this) {
+  TLInfo(*this), TSInfo(*this) {
   if (getRelocationModel() == Reloc::Default) {
       setRelocationModel(Reloc::Static);
   }
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 9bf9898..6a57e58 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -17,6 +17,7 @@
 #include "MBlazeSubtarget.h"
 #include "MBlazeInstrInfo.h"
 #include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
 #include "MBlazeIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
@@ -31,6 +32,7 @@ namespace llvm {
     MBlazeInstrInfo       InstrInfo;
     TargetFrameInfo     FrameInfo;
     MBlazeTargetLowering  TLInfo;
+    MBlazeSelectionDAGInfo TSInfo;
     MBlazeIntrinsicInfo IntrinsicInfo;
   public:
     MBlazeTargetMachine(const Target &T, const std::string &TT,
@@ -54,6 +56,9 @@ namespace llvm {
     virtual const MBlazeTargetLowering *getTargetLowering() const
     { return &TLInfo; }
 
+    virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+    { return &TSInfo; }
+
     const TargetIntrinsicInfo *getIntrinsicInfo() const
     { return &IntrinsicInfo; }
 
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 15d16ec..3de173c 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -34,12 +34,11 @@ namespace llvm {
     MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
       : TargetMachine(T) {}
 
-    virtual bool WantsWholeFile() const { return true; }
-    virtual bool addPassesToEmitWholeFile(PassManager &PM,
-                                          formatted_raw_ostream &Out,
-                                          CodeGenFileType FileType,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool DisableVerify);
+    virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                     formatted_raw_ostream &Out,
+                                     CodeGenFileType FileType,
+                                     CodeGenOpt::Level OptLevel,
+                                     bool DisableVerify);
 
     virtual const TargetData *getTargetData() const { return 0; }
   };
@@ -279,6 +278,8 @@ std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
     return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
   case CallingConv::X86_StdCall:
     return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
+  case CallingConv::X86_ThisCall:
+    return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) ";
   default:
     errs() << "CallingConvID = " << CallingConvID << '\n';
     llvm_unreachable("Unsupported calling convention");
@@ -1686,11 +1687,11 @@ void MSILWriter::printExternals() {
 //                      External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
-                                          formatted_raw_ostream &o,
-                                          CodeGenFileType FileType,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool DisableVerify)
+bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM,
+                                     formatted_raw_ostream &o,
+                                     CodeGenFileType FileType,
+                                     CodeGenOpt::Level OptLevel,
+                                     bool DisableVerify)
 {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   MSILWriter* Writer = new MSILWriter(o);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
index f9620e8..e937696 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
@@ -26,7 +26,7 @@ namespace llvm {
 
   /// MSP430MCInstLower - This class is used to lower an MachineInstr
   /// into an MCInst.
-class VISIBILITY_HIDDEN MSP430MCInstLower {
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
   MCContext &Ctx;
   Mangler &Mang;
 
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index c3e2bdf7..403400e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -83,7 +83,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
 
   setStackPointerRegisterToSaveRestore(MSP430::SPW);
   setBooleanContents(ZeroOrOneBooleanContent);
-  setSchedulingPreference(SchedulingForLatency);
+  setSchedulingPreference(Sched::Latency);
 
   // We have post-incremented loads / stores.
   setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
@@ -897,6 +897,9 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
 
 SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
                                               SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -920,6 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
                                              SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
+
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 2b09b3d..18226ab 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -32,7 +32,8 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
 void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MI,
                                     unsigned SrcReg, bool isKill, int FrameIdx,
-                                    const TargetRegisterClass *RC) const {
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
@@ -59,7 +60,8 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
                                            unsigned DestReg, int FrameIdx,
-                                           const TargetRegisterClass *RC) const{
+                                           const TargetRegisterClass *RC,
+                                           const TargetRegisterInfo *TRI) const{
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
@@ -85,10 +87,8 @@ bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
-                                   const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
+                                   const TargetRegisterClass *SrcRC,
+                                   DebugLoc DL) const {
   if (DestRC == SrcRC) {
     unsigned Opc;
     if (DestRC == &MSP430::GR16RegClass) {
@@ -130,7 +130,8 @@ MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
 bool
 MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
@@ -154,7 +155,8 @@ MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 bool
 MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 6ef4b0a..842b4cb 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -52,7 +52,8 @@ public:
   bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned DestReg, unsigned SrcReg,
                     const TargetRegisterClass *DestRC,
-                    const TargetRegisterClass *SrcRC) const;
+                    const TargetRegisterClass *SrcRC,
+                    DebugLoc DL) const;
 
   bool isMoveInstr(const MachineInstr& MI,
                    unsigned &SrcReg, unsigned &DstReg,
@@ -62,18 +63,22 @@ public:
                                    MachineBasicBlock::iterator MI,
                                    unsigned SrcReg, bool isKill,
                                    int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIdx,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const;
   virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                           const TargetRegisterInfo *TRI) const;
 
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
 
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 4078626..f8aec66 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -43,6 +43,9 @@ def R13B : MSP430Reg<13, "r13">;
 def R14B : MSP430Reg<14, "r14">;
 def R15B : MSP430Reg<15, "r15">;
 
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
 def PCW  : MSP430RegWithSubregs<0,  "r0",  [PCB]>;
 def SPW  : MSP430RegWithSubregs<1,  "r1",  [SPB]>;
 def SRW  : MSP430RegWithSubregs<2,  "r2",  [SRB]>;
@@ -59,13 +62,7 @@ def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
 def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
 def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
 def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
-
-def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW,
-                    R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
-                   [PCB, SPB, SRB, CGB, FPB,
-                    R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def subreg_8bit : PatLeaf<(i32 1)>;
+}
 
 def GR8 : RegisterClass<"MSP430", [i8], 8,
    // Volatile registers
@@ -101,7 +98,7 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
    // Volatile, but not allocable
    PCW, SPW, SRW, CGW]>
 {
-  let SubRegClassList = [GR8];
+  let SubRegClasses = [(GR8 subreg_8bit)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
index a54c929..24f45fa 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "msp430-selectiondag-info"
-#include "MSP430SelectionDAGInfo.h"
+#include "MSP430TargetMachine.h"
 using namespace llvm;
 
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() {
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index c952ab7..fa81948 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class MSP430TargetMachine;
+
 class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  MSP430SelectionDAGInfo();
+  explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
   ~MSP430SelectionDAGInfo();
 };
 
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index a0dbac2..99877c8 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -33,7 +33,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
   Subtarget(TT, FS),
   // FIXME: Check TargetData string.
   DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
-  InstrInfo(*this), TLInfo(*this),
+  InstrInfo(*this), TLInfo(*this), TSInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
 
 
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 68bde9a..b93edfd 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -17,6 +17,7 @@
 
 #include "MSP430InstrInfo.h"
 #include "MSP430ISelLowering.h"
+#include "MSP430SelectionDAGInfo.h"
 #include "MSP430RegisterInfo.h"
 #include "MSP430Subtarget.h"
 #include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
   const TargetData       DataLayout;       // Calculates type size & alignment
   MSP430InstrInfo        InstrInfo;
   MSP430TargetLowering   TLInfo;
+  MSP430SelectionDAGInfo TSInfo;
 
   // MSP430 does not have any call stack frame, therefore not having
   // any MSP430 specific FrameInfo class.
@@ -54,6 +56,10 @@ public:
     return &TLInfo;
   }
 
+  virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
 }; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index d269153..4d7fe4c 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -145,7 +145,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
     CPUBitmask |= (1 << MipsRegisterInfo::
                 getRegisterNumbering(RI.getFrameRegister(*MF)));
   
-  if (MFI->hasCalls()) 
+  if (MFI->adjustsStack()) 
     CPUBitmask |= (1 << MipsRegisterInfo::
                 getRegisterNumbering(RI.getRARegister()));
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ee85a3f3..3888bbf 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -225,12 +225,12 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
                                     MVT::Other, Offset0, Base, Chain);
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
-  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, 
+  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, 
                             MVT::f64, Undef, SDValue(LD0, 0));
 
   SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
                           MVT::Other, Offset1, Base, SDValue(LD0, 1));
-  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, 
+  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, 
                             MVT::f64, I0, SDValue(LD1, 0));
 
   ReplaceUses(SDValue(N, 0), I1);
@@ -266,9 +266,9 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   // Get the even and odd part from the f64 register
-  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, 
+  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, 
                                                  dl, MVT::f32, N1);
-  SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN,
+  SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
                                                  dl, MVT::f32, N1);
 
   // The second store should start after for 4 bytes. 
@@ -438,9 +438,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
         SDValue Undef = SDValue(
           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
         SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
-        SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, 
+        SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, 
                             MVT::f64, Undef, SDValue(MTC, 0));
-        SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, 
+        SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, 
                             MVT::f64, I0, SDValue(MTC, 0));
         ReplaceUses(SDValue(Node, 0), I1);
         return I1.getNode();
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index dbd3c24..4005e35 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -124,7 +124,6 @@ void MipsInstrInfo::
 insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const 
 {
   DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
   BuildMI(MBB, MI, DL, get(Mips::NOP));
 }
 
@@ -132,10 +131,8 @@ bool MipsInstrInfo::
 copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
              unsigned DestReg, unsigned SrcReg,
              const TargetRegisterClass *DestRC,
-             const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  
-  if (I != MBB.end()) DL = I->getDebugLoc();
+             const TargetRegisterClass *SrcRC,
+             DebugLoc DL) const {
 
   if (DestRC != SrcRC) {
 
@@ -190,7 +187,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 void MipsInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI, 
-                    const TargetRegisterClass *RC) const {
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -223,7 +221,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 void MipsInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const 
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const 
 {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
@@ -624,7 +623,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP,
                               Mips::CPURegsRegisterClass,
-                              Mips::CPURegsRegisterClass);
+                              Mips::CPURegsRegisterClass,
+                              DebugLoc());
   assert(Ok && "Couldn't assign to global base register!");
   Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Mips::GP);
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index ab8dc59..7919d9a 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -209,16 +209,19 @@ public:
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 478da84..5e719af 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -288,7 +288,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
 
   // Stack locations for FP and RA. If only one of them is used, 
   // the space must be allocated for both, otherwise no space at all.
-  if (hasFP(MF) || MFI->hasCalls()) {
+  if (hasFP(MF) || MFI->adjustsStack()) {
     // FP stack location
     MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), 
                          StackOffset);
@@ -302,7 +302,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
     MipsFI->setRAStackOffset(StackOffset);
     StackOffset += RegSize;
 
-    if (MFI->hasCalls())
+    if (MFI->adjustsStack())
       TopCPUSavedRegOff += RegSize;
   }
 
@@ -407,7 +407,7 @@ emitPrologue(MachineFunction &MF) const
   unsigned StackSize = MFI->getStackSize();
 
   // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->hasCalls()) return;
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
 
   int FPOffset = MipsFI->getFPStackOffset();
   int RAOffset = MipsFI->getRAStackOffset();
@@ -425,7 +425,7 @@ emitPrologue(MachineFunction &MF) const
 
   // Save the return address only if the function isnt a leaf one.
   // sw  $ra, stack_loc($sp)
-  if (MFI->hasCalls()) { 
+  if (MFI->adjustsStack()) { 
     BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
         .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
   }
@@ -477,7 +477,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
 
   // Restore the return address only if the function isnt a leaf one.
   // lw  $ra, stack_loc($sp)
-  if (MFI->hasCalls()) { 
+  if (MFI->adjustsStack()) { 
     BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
       .addImm(RAOffset).addReg(Mips::SP);
   }
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 9fd044c..bc857b8 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -23,15 +23,6 @@ class MipsSubtarget;
 class TargetInstrInfo;
 class Type;
 
-namespace Mips {
-  /// SubregIndex - The index of various sized subregister classes. Note that 
-  /// these indices must be kept in sync with the class indices in the 
-  /// MipsRegisterInfo.td file.
-  enum SubregIndex {
-    SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
-  };
-}
-
 struct MipsRegisterInfo : public MipsGenRegisterInfo {
   const MipsSubtarget &Subtarget;
   const TargetInstrInfo &TII;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 00e7723..be78a22 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -34,9 +34,14 @@ class FPR<bits<5> num, string n> : MipsReg<n> {
 }
 
 // Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> subregs> 
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd  : SubRegIndex;
+}
+class AFPR<bits<5> num, string n, list<Register> subregs>
   : MipsRegWithSubRegs<n, subregs> {
   let Num = num;
+  let SubRegIndices = [sub_fpeven, sub_fpodd];
 }
 
 //===----------------------------------------------------------------------===//
@@ -141,23 +146,6 @@ let Namespace = "Mips" in {
 }
 
 //===----------------------------------------------------------------------===//
-// Subregister Set Definitions
-//===----------------------------------------------------------------------===//
-
-def mips_subreg_fpeven : PatLeaf<(i32 1)>;
-def mips_subreg_fpodd  : PatLeaf<(i32 2)>;
-
-def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, 
-                    D8, D9, D10, D11, D12, D13, D14, D15],
-                   [F0, F2, F4, F6, F8, F10, F12, F14,
-                    F16, F18, F20, F22, F24, F26, F28, F30]>;
-
-def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, 
-                    D8, D9, D10, D11, D12, D13, D14, D15],
-                   [F1, F3, F5, F7, F9, F11, F13, F15,
-                    F17, F19, F21, F23, F25, F27, F29, F31]>;
-
-//===----------------------------------------------------------------------===//
 // Register Classes
 //===----------------------------------------------------------------------===//
 
@@ -255,7 +243,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64,
   // Reserved
   D15]>
 {
-  let SubRegClassList = [FGR32, FGR32];
+  let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index 72c149d..e4d70fc 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-selectiondag-info"
-#include "MipsSelectionDAGInfo.h"
+#include "MipsTargetMachine.h"
 using namespace llvm;
 
-MipsSelectionDAGInfo::MipsSelectionDAGInfo() {
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 6eaf0c9..6cafb55 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class MipsTargetMachine;
+
 class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  MipsSelectionDAGInfo();
+  explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
   ~MipsSelectionDAGInfo();
 };
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 4724ff7..ad3eb9e 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
                         std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), 
   InstrInfo(*this), 
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
-  TLInfo(*this) {
+  TLInfo(*this), TSInfo(*this) {
   // Abicall enables PIC by default
   if (getRelocationModel() == Reloc::Default) {
     if (Subtarget.isABI_O32())
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index cd671cf..d63976f 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -17,6 +17,7 @@
 #include "MipsSubtarget.h"
 #include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
+#include "MipsSelectionDAGInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -30,6 +31,7 @@ namespace llvm {
     MipsInstrInfo       InstrInfo;
     TargetFrameInfo     FrameInfo;
     MipsTargetLowering  TLInfo;
+    MipsSelectionDAGInfo TSInfo;
   public:
     MipsTargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS, bool isLittle);
@@ -51,6 +53,10 @@ namespace llvm {
       return &TLInfo;
     }
 
+    virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+      return &TSInfo;
+    }
+
     // Pass Pipeline Configuration
     virtual bool addInstSelector(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index a424c27..aa2e1f4 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -29,7 +29,7 @@
 #include <string>
 
 namespace llvm {
-  class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
+  class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter {
   public:
     explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
   private:
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 5d86329..6a4d0d6 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -70,7 +70,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
   
   // We also need to encode the information about the base type of
   // pointer in TypeNo.
-  DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom();
+  DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom();
   PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
 }
 
@@ -79,7 +79,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
                                           bool &HasAux, int Aux[],
                                           std::string &TagName) {
 
-  DICompositeType CTy = DICompositeType(Ty.getNode());
+  DICompositeType CTy = DICompositeType(Ty);
   DIArray Elements = CTy.getTypeArray();
   unsigned short size = 1;
   unsigned short Dimension[4]={0,0,0,0};
@@ -88,7 +88,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
     if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
       TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
       TypeNo = TypeNo | PIC16Dbg::DT_ARY;
-      DISubrange SubRange = DISubrange(Element.getNode());
+      DISubrange SubRange = DISubrange(Element);
       Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
       // Each dimension is represented by 2 bytes starting at byte 9.
       Aux[8+i*2+0] = Dimension[i];
@@ -111,7 +111,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
                                                   unsigned short &TypeNo,
                                                   bool &HasAux, int Aux[],
                                                   std::string &TagName) {
-  DICompositeType CTy = DICompositeType(Ty.getNode());
+  DICompositeType CTy = DICompositeType(Ty);
   TypeNo = TypeNo << PIC16Dbg::S_BASIC;
   if (Ty.getTag() == dwarf::DW_TAG_structure_type)
     TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
@@ -124,7 +124,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
   // llvm.dbg.composite* global variable. Since we need to revisit 
   // PIC16DebugInfo implementation anyways after the MDNodes based 
   // framework is done, let us continue with the way it is.
-  std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18);
+  std::string UniqueSuffix = "." + Ty->getNameStr().substr(18);
   TagName += UniqueSuffix;
   unsigned short size = CTy.getSizeInBits()/8;
   // 7th and 8th byte represent size.
@@ -303,7 +303,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
     bool HasAux = false;
     int ElementAux[PIC16Dbg::AuxSize] = { 0 };
     std::string TagName = "";
-    DIDerivedType DITy(Element.getNode());
+    DIDerivedType DITy(Element);
     unsigned short ElementSize = DITy.getSizeInBits()/8;
     // Get mangleddd name for this structure/union  element.
     std::string MangMemName = DITy.getName().str() + SuffixNo;
@@ -336,7 +336,7 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
         CTy.getTag() == dwarf::DW_TAG_structure_type ) {
       // Get the number after llvm.dbg.composite and make UniqueSuffix from 
       // it.
-      std::string DIVar = CTy.getNode()->getNameStr();
+      std::string DIVar = CTy->getNameStr();
       std::string UniqueSuffix = "." + DIVar.substr(18);
       std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
       unsigned short size = CTy.getSizeInBits()/8;
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index f1fcec5..ecaddd3 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -26,7 +26,7 @@ using namespace llvm;
 
 namespace {
 
-class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
+class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel {
 
   /// TM - Keep a reference to PIC16TargetMachine.
   const PIC16TargetMachine &TM;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 9e415e0..793dd9f 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -70,7 +70,8 @@ unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
 void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 
                                          MachineBasicBlock::iterator I,
                                          unsigned SrcReg, bool isKill, int FI,
-                                         const TargetRegisterClass *RC) const {
+                                         const TargetRegisterClass *RC,
+                                         const TargetRegisterInfo *TRI) const {
   const PIC16TargetLowering *PTLI = TM.getTargetLowering();
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
@@ -112,7 +113,8 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 
                                           MachineBasicBlock::iterator I,
                                           unsigned DestReg, int FI,
-                                          const TargetRegisterClass *RC) const {
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const {
   const PIC16TargetLowering *PTLI = TM.getTargetLowering();
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
@@ -153,9 +155,8 @@ bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
-                                   const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
+                                   const TargetRegisterClass *SrcRC,
+                                   DebugLoc DL) const {
 
   if (DestRC == PIC16::FSR16RegisterClass) {
     BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index 56f51f0..40a4cb4 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -49,17 +49,20 @@ public:
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
                                                                                
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
   virtual bool copyRegToReg(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   virtual bool isMoveInstr(const MachineInstr &MI,
                            unsigned &SrcReg, unsigned &DstReg,
                            unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 9039ca7..5b33b51 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -44,7 +44,8 @@ namespace llvm {
     unsigned Size;
     
     PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
-      : MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
+      : MCSection(SV_PIC16, K), Name(name), Address(addr),
+        Color(color), Size(0) {
     }
     
   public:
@@ -86,6 +87,11 @@ namespace llvm {
     /// to a section.
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
                                       raw_ostream &OS) const;
+
+    static bool classof(const MCSection *S) {
+      return S->getVariant() == SV_PIC16;
+    }
+    static bool classof(const PIC16Section *) { return true; }
   };
 
 } // end namespace llvm
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
index 76c6c60..995955a 100644
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pic16-selectiondag-info"
-#include "PIC16SelectionDAGInfo.h"
+#include "PIC16TargetMachine.h"
 using namespace llvm;
 
-PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() {
+PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
index 112480e5..c67fd8b 100644
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class PIC16TargetMachine;
+
 class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  PIC16SelectionDAGInfo();
+  explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM);
   ~PIC16SelectionDAGInfo();
 };
 
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index e2acb85..82b69be 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -35,7 +35,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
 : LLVMTargetMachine(T, TT),
   Subtarget(TT, FS, Trad),
   DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), 
-  InstrInfo(*this), TLInfo(*this),
+  InstrInfo(*this), TLInfo(*this), TSInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
 
 
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
index 849845a..dae5d31 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -17,6 +17,7 @@
 
 #include "PIC16InstrInfo.h"
 #include "PIC16ISelLowering.h"
+#include "PIC16SelectionDAGInfo.h"
 #include "PIC16RegisterInfo.h"
 #include "PIC16Subtarget.h"
 #include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class PIC16TargetMachine : public LLVMTargetMachine {
   const TargetData      DataLayout;       // Calculates type size & alignment
   PIC16InstrInfo        InstrInfo;
   PIC16TargetLowering   TLInfo;
+  PIC16SelectionDAGInfo TSInfo;
 
   // PIC16 does not have any call stack frame, therefore not having 
   // any PIC16 specific FrameInfo class.
@@ -54,6 +56,10 @@ public:
     return &TLInfo;
   }
 
+  virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   virtual bool addInstSelector(PassManagerBase &PM,
                                CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3d9f8aa..00eebb8 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -712,8 +712,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
     IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
                                            CCReg), 0);
-  else
-    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
+ else
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+                                           CR7Reg, CCReg), 0);
   
   SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
                       getI32Imm(31), getI32Imm(31) };
@@ -848,7 +849,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
                                     N->getOperand(0), InFlag);
     else
-      return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag);
+      return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+                                    N->getOperand(0), InFlag);
   }
     
   case ISD::SDIV: {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 6f11953..10b516a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5496,12 +5496,15 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
 
 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
   DebugLoc dl = Op.getDebugLoc();
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   // Make sure the function does not optimize away the store of the RA to
   // the stack.
-  MachineFunction &MF = DAG.getMachineFunction();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setLRStoreRequired();
   bool isPPC64 = PPCSubTarget.isPPC64();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 1d05f3d..6dcaf1e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -111,9 +111,10 @@ namespace llvm {
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
       
-      /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions.
-      /// This copies the bits corresponding to the specified CRREG into the
-      /// resultant GPR.  Bits corresponding to other CR regs are undefined.
+      /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+      /// instructions.  This copies the bits corresponding to the specified
+      /// CRREG into the resultant GPR.  Bits corresponding to other CR regs
+      /// are undefined.
       MFCR,
 
       /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index ae1fbd8..1b7a778 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -203,8 +203,6 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, 
                               MachineBasicBlock::iterator MI) const {
   DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
   BuildMI(MBB, MI, DL, get(PPC::NOP));
 }
 
@@ -347,15 +345,13 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
-                                   const TargetRegisterClass *SrcRC) const {
+                                   const TargetRegisterClass *SrcRC,
+                                   DebugLoc DL) const {
   if (DestRC != SrcRC) {
     // Not yet supported!
     return false;
   }
 
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
   if (DestRC == PPC::GPRCRegisterClass) {
     BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
   } else if (DestRC == PPC::G8RCRegisterClass) {
@@ -446,7 +442,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       // issue a MFCR to save all of the CRBits.
       unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? 
                                                            PPC::R2 : PPC::R0;
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+                               .addReg(SrcReg, getKillRegState(isKill)));
     
       // If the saved register wasn't CR0, shift the bits left so that they are
       // in CR0's slot.
@@ -520,7 +517,8 @@ void
 PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator MI,
                                   unsigned SrcReg, bool isKill, int FrameIdx,
-                                  const TargetRegisterClass *RC) const {
+                                  const TargetRegisterClass *RC,
+                                  const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
   SmallVector<MachineInstr*, 4> NewMIs;
 
@@ -635,7 +633,8 @@ void
 PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned DestReg, int FrameIdx,
-                                   const TargetRegisterClass *RC) const {
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
   SmallVector<MachineInstr*, 4> NewMIs;
   DebugLoc DL;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 9fb6e7d..7a9e11b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -114,17 +114,20 @@ public:
                             MachineBasicBlock::iterator MI,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
   
   virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
                                                  int FrameIx,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 532a3ec..63b4581 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -662,7 +662,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
                    [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
                    isDOT;
 
-let isBarrier = 1, hasCtrlDep = 1 in
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
 def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
 
 //===----------------------------------------------------------------------===//
@@ -862,7 +862,6 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
                      [(store F8RC:$frS, xaddr:$dst)]>;
 }
 
-let isBarrier = 1 in
 def SYNC : XForm_24_sync<31, 598, (outs), (ins),
                         "sync", LdStSync,
                         [(int_ppc_sync)]>;
@@ -1118,14 +1117,17 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
 def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-// FIXME:  this Uses all the CR registers.  Marking it as such is 
-// necessary for DeadMachineInstructionElim to do the right thing.
-// However, marking it also exposes PR 2964, and causes crashes in
-// the Local RA because it doesn't like this sequence:
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
 //  vreg = MCRF  CR0
 //  MFCR  <kill of whatever preg got assigned to vreg>
-// For now DeadMachineInstructionElim is turned off, so don't do the marking.
-def MFCR  : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>,
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it.  Represent this in the
+// instruction to keep the register allocator from becoming confused.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+                       "mfcr $rT ${:comment} $FXM", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "mfcr $rT, $FXM", SprMFCR>,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 5f1e04e..0ff852c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -689,19 +689,15 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
   const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
   unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+  unsigned SrcReg = MI.getOperand(0).getReg();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
-  // an MFCR to save all of the CRBits. Add an implicit kill of the CR.
-  if (!MI.getOperand(0).isKill())
-    BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg);
-  else
-    // Implicitly kill the CR register.
-    BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg)
-      .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill);
+  // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+  BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
     
   // If the saved register wasn't CR0, shift the bits left so that they are in
   // CR0's slot.
-  unsigned SrcReg = MI.getOperand(0).getReg();
   if (SrcReg != PPC::CR0)
     // rlwinm rA, rA, ShiftBits, 0, 31.
     BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
@@ -1009,7 +1005,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->hasCalls() &&                          // No calls.
+      !MFI->adjustsStack() &&                      // No calls.
       (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
     // No need for frame
     MFI->setStackSize(0);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 1cb7340..8604f54 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -10,6 +10,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
 class PPCReg<string n> : Register<n> {
   let Namespace = "PPC";
 }
@@ -25,6 +34,7 @@ class GPR<bits<5> num, string n> : PPCReg<n> {
 class GP8<GPR SubReg, string n> : PPCReg<n> {
   field bits<5> Num = SubReg.Num;
   let SubRegs = [SubReg];
+  let SubRegIndices = [sub_32];
 }
 
 // SPR - One of the 32-bit special-purpose registers
@@ -225,6 +235,7 @@ def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
 def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
 
 // Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
 def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>;
 def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>;
 def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>;
@@ -233,15 +244,7 @@ def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>;
 def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>;
 def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>;
 def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>;
-
-def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
-                   [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
-def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
-                   [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>;
-def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
-                   [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>;
-def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
-                   [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>;
+}
 
 // Link register
 def LR  : SPR<8, "lr">, DwarfRegNum<[65]>;
@@ -372,7 +375,7 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
 def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
   CR3, CR4]>
 {
-  let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC];
+  let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
 }
 
 def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index c0004a9..d4258b4 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "powerpc-selectiondag-info"
-#include "PPCSelectionDAGInfo.h"
+#include "PPCTargetMachine.h"
 using namespace llvm;
 
-PPCSelectionDAGInfo::PPCSelectionDAGInfo() {
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 3ad3418..341b69c 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class PPCTargetMachine;
+
 class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  PPCSelectionDAGInfo();
+  explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
   ~PPCSelectionDAGInfo();
 };
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index c4a7408..10cd10b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -44,7 +44,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
-    FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
+    FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit),
+    TLInfo(*this), TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
 
   if (getRelocationModel() == Reloc::Default) {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 35e33a2..626ddbb 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -19,6 +19,7 @@
 #include "PPCJITInfo.h"
 #include "PPCInstrInfo.h"
 #include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 
@@ -35,6 +36,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
   PPCFrameInfo        FrameInfo;
   PPCJITInfo          JITInfo;
   PPCTargetLowering   TLInfo;
+  PPCSelectionDAGInfo TSInfo;
   InstrItineraryData  InstrItins;
 
 public:
@@ -47,6 +49,9 @@ public:
   virtual const PPCTargetLowering *getTargetLowering() const { 
    return &TLInfo;
   }
+  virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
   virtual const PPCRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 144bf5d..7fa73ed 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1833,6 +1833,21 @@ entry:
 We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates.
 See GCC PR34949
 
+Another interesting case is that something related could be used for variables
+that go const after their ctor has finished.  In these cases, globalopt (which
+can statically run the constructor) could mark the global const (so it gets put
+in the readonly section).  A testcase would be:
+
+#include <complex>
+using namespace std;
+const complex<char> should_be_in_rodata (42,-42);
+complex<char> should_be_in_data (42,-42);
+complex<char> should_be_in_bss;
+
+Where we currently evaluate the ctors but the globals don't become const because
+the optimizer doesn't know they "become const" after the ctor is done.  See
+GCC PR4131 for more examples.
+
 //===---------------------------------------------------------------------===//
 
 In this code:
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index e494d7d..8e49eca 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -122,15 +122,13 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I,
                                   unsigned DestReg, unsigned SrcReg,
                                   const TargetRegisterClass *DestRC,
-                                  const TargetRegisterClass *SrcRC) const {
+                                  const TargetRegisterClass *SrcRC,
+                                  DebugLoc DL) const {
   if (DestRC != SrcRC) {
     // Not yet supported!
     return false;
   }
 
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
   if (DestRC == SP::IntRegsRegisterClass)
     BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
   else if (DestRC == SP::FPRegsRegisterClass)
@@ -148,7 +146,8 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 void SparcInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC) const {
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -169,7 +168,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 void SparcInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const {
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 345674b..a00ba39 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -74,17 +74,20 @@ public:
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
   
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 2b05c19..fede929 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -20,6 +20,11 @@ class SparcCtrlReg<string n>: Register<n> {
   let Namespace = "SP";
 }
 
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd  : SubRegIndex;
+}
+
 // Registers are identified with 5-bit ID numbers.
 // Ri - 32-bit integer registers
 class Ri<bits<5> num, string n> : SparcReg<n> {
@@ -33,6 +38,7 @@ class Rf<bits<5> num, string n> : SparcReg<n> {
 class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
   let Num = num;
   let SubRegs = subregs;
+  let SubRegIndices = [sub_even, sub_odd];
 }
 
 // Control Registers
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
index 4825aa9..190c575 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sparc-selectiondag-info"
-#include "SparcSelectionDAGInfo.h"
+#include "SparcTargetMachine.h"
 using namespace llvm;
 
-SparcSelectionDAGInfo::SparcSelectionDAGInfo() {
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index bc1b561..dcd4203 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class SparcTargetMachine;
+
 class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  SparcSelectionDAGInfo();
+  explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
   ~SparcSelectionDAGInfo();
 };
 
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a676623..b58d6ba 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -34,7 +34,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
-     TLInfo(*this), InstrInfo(Subtarget),
+     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
 }
 
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 1367a31..322c82a 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -20,6 +20,7 @@
 #include "SparcInstrInfo.h"
 #include "SparcSubtarget.h"
 #include "SparcISelLowering.h"
+#include "SparcSelectionDAGInfo.h"
 
 namespace llvm {
 
@@ -27,6 +28,7 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcSubtarget Subtarget;
   const TargetData DataLayout;       // Calculates type size & alignment
   SparcTargetLowering TLInfo;
+  SparcSelectionDAGInfo TSInfo;
   SparcInstrInfo InstrInfo;
   TargetFrameInfo FrameInfo;
 public:
@@ -42,6 +44,9 @@ public:
   virtual const SparcTargetLowering* getTargetLowering() const {
     return &TLInfo;
   }
+  virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
   // Pass Pipeline Configuration
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 2094cc9..b35190a 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -359,29 +359,25 @@ void SubtargetFeatures::dump() const {
   print(dbgs());
 }
 
-/// getDefaultSubtargetFeatures - Return a string listing
-/// the features associated with the target triple.
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
 ///
 /// FIXME: This is an inelegant way of specifying the features of a
 /// subtarget. It would be better if we could encode this information
 /// into the IR. See <rdar://5972456>.
 ///
-std::string SubtargetFeatures::getDefaultSubtargetFeatures(
-                                               const Triple& Triple) {
-  switch (Triple.getVendor()) {
-  case Triple::Apple:
-    switch (Triple.getArch()) {
-    case Triple::ppc:   // powerpc-apple-*
-      return std::string("altivec");
-    case Triple::ppc64: // powerpc64-apple-*
-      return std::string("64bit,altivec");
-    default:
-      break;
+void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU,
+                                                    const Triple& Triple) {
+  setCPU(CPU);
+
+  if (Triple.getVendor() == Triple::Apple) {
+    if (Triple.getArch() == Triple::ppc) {
+      // powerpc-apple-*
+      AddFeature("altivec");
+    } else if (Triple.getArch() == Triple::ppc64) {
+      // powerpc64-apple-*
+      AddFeature("64bit");
+      AddFeature("altivec");
     }
-    break;
-  default:
-    break;
-  } 
-
-  return std::string("");
+  }
 }
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 07cfb2c..90be222 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -124,9 +124,9 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     unsigned Reg = MO.getReg();
     if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
       if (strncmp(Modifier + 7, "even", 4) == 0)
-        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_EVEN);
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32);
       else if (strncmp(Modifier + 7, "odd", 3) == 0)
-        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_ODD);
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
       else
         assert(0 && "Invalid subreg modifier");
     }
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 75d563b..bb2952a 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -30,11 +30,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static const unsigned subreg_even32 = 1;
-static const unsigned subreg_odd32  = 2;
-static const unsigned subreg_even   = 3;
-static const unsigned subreg_odd    = 4;
-
 namespace {
   /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
   /// instead of register numbers for the leaves of the matched tree.
@@ -644,7 +639,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     Dividend =
       CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
                              SDValue(Tmp, 0), SDValue(Dividend, 0),
-                             CurDAG->getTargetConstant(subreg_odd, MVT::i32));
+                     CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
 
     SDNode *Result;
     SDValue DivVal = SDValue(Dividend, 0);
@@ -660,7 +655,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
 
     // Copy the division (odd subreg) result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
       SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
@@ -673,7 +669,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
 
     // Copy the remainder (even subreg) result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_even32 : SystemZ::subreg_even);
       SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
@@ -718,7 +715,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                          dl, ResVT);
     {
-      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
       Dividend =
         CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
                                SDValue(Tmp, 0), SDValue(Dividend, 0),
@@ -742,7 +740,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
 
     // Copy the division (odd subreg) result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
       SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
@@ -754,7 +753,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
 
     // Copy the remainder (even subreg) result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_even32 : SystemZ::subreg_even);
       SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index e98f18b..76f2901 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,7 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   // LLVM's current latency-oriented scheduler can't handle physreg definitions
   // such as SystemZ has with PSW, so set this to the register-pressure
   // scheduler, because it can.
-  setSchedulingPreference(SchedulingForRegPressure);
+  setSchedulingPreference(Sched::RegPressure);
 
   setBooleanContents(ZeroOrOneBooleanContent);
 
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c92caa4..043686c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -61,7 +61,8 @@ static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
 void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MI,
                                     unsigned SrcReg, bool isKill, int FrameIdx,
-                                    const TargetRegisterClass *RC) const {
+                                           const TargetRegisterClass *RC,
+                                           const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
@@ -90,7 +91,8 @@ void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
                                            unsigned DestReg, int FrameIdx,
-                                           const TargetRegisterClass *RC) const{
+                                            const TargetRegisterClass *RC,
+                                            const TargetRegisterInfo *TRI) const{
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
@@ -119,9 +121,8 @@ bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I,
                                     unsigned DestReg, unsigned SrcReg,
                                     const TargetRegisterClass *DestRC,
-                                    const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
+                                    const TargetRegisterClass *SrcRC,
+                                    DebugLoc DL) const {
 
   // Determine if DstRC and SrcRC have a common superclass.
   const TargetRegisterClass *CommonRC = DestRC;
@@ -269,7 +270,8 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
 bool
 SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
@@ -333,7 +335,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     const TargetRegisterClass *RegClass = CSI[i].getRegClass();
     if (RegClass == &SystemZ::FP64RegClass) {
       MBB.addLiveIn(Reg);
-      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass);
+      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass,
+                          &RI);
     }
   }
 
@@ -343,7 +346,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 bool
 SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
@@ -359,7 +363,7 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
     unsigned Reg = CSI[i].getReg();
     const TargetRegisterClass *RegClass = CSI[i].getRegClass();
     if (RegClass == &SystemZ::FP64RegClass)
-      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
   }
 
   // Restore GP registers
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index ef3b39e..a753f14 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -63,7 +63,8 @@ public:
   bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned DestReg, unsigned SrcReg,
                     const TargetRegisterClass *DestRC,
-                    const TargetRegisterClass *SrcRC) const;
+                    const TargetRegisterClass *SrcRC,
+                    DebugLoc DL) const;
 
   bool isMoveInstr(const MachineInstr& MI,
                    unsigned &SrcReg, unsigned &DstReg,
@@ -75,18 +76,22 @@ public:
                                    MachineBasicBlock::iterator MI,
                                    unsigned SrcReg, bool isKill,
                                    int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIdx,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const;
   virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                           const TargetRegisterInfo *TRI) const;
 
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
   virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 99e396a..42aa5dd 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===//
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,15 +19,6 @@
 
 namespace llvm {
 
-namespace SystemZ {
-  /// SubregIndex - The index of various sized subregister classes. Note that
-  /// these indices must be kept in sync with the class indices in the
-  /// SystemZRegisterInfo.td file.
-  enum SubregIndex {
-    SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2
-  };
-}
-
 class SystemZSubtarget;
 class SystemZInstrInfo;
 class Type;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 8795847..b561744 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -53,6 +53,14 @@ class FPRL<bits<4> num, string n, list<Register> subregs>
   field bits<4> Num = num;
 }
 
+let Namespace = "SystemZ" in {
+def subreg_32bit  : SubRegIndex;
+def subreg_even32 : SubRegIndex;
+def subreg_odd32  : SubRegIndex;
+def subreg_even   : SubRegIndex;
+def subreg_odd    : SubRegIndex;
+}
+
 // General-purpose registers
 def R0W  : GPR32< 0,  "r0">, DwarfRegNum<[0]>;
 def R1W  : GPR32< 1,  "r1">, DwarfRegNum<[1]>;
@@ -71,6 +79,7 @@ def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>;
 def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>;
 def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>;
 
+let SubRegIndices = [subreg_32bit] in {
 def R0D  : GPR64< 0,  "r0", [R0W]>,  DwarfRegNum<[0]>;
 def R1D  : GPR64< 1,  "r1", [R1W]>,  DwarfRegNum<[1]>;
 def R2D  : GPR64< 2,  "r2", [R2W]>,  DwarfRegNum<[2]>;
@@ -87,8 +96,10 @@ def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
 def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
 def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
 def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
 
 // Register pairs
+let SubRegIndices = [subreg_even32, subreg_odd32] in {
 def R0P  : GPR64< 0,  "r0", [R0W,  R1W],  [R0D,  R1D]>,  DwarfRegNum<[0]>;
 def R2P  : GPR64< 2,  "r2", [R2W,  R3W],  [R2D,  R3D]>,  DwarfRegNum<[2]>;
 def R4P  : GPR64< 4,  "r4", [R4W,  R5W],  [R4D,  R5D]>,  DwarfRegNum<[4]>;
@@ -97,7 +108,11 @@ def R8P  : GPR64< 8,  "r8", [R8W,  R9W],  [R8D,  R9D]>,  DwarfRegNum<[8]>;
 def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>;
 def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>;
 def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
+}
 
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit),
+                     (subreg_odd32  subreg_odd,  subreg_32bit)] in {
 def R0Q  : GPR128< 0,  "r0", [R0D,  R1D],  [R0P]>,  DwarfRegNum<[0]>;
 def R2Q  : GPR128< 2,  "r2", [R2D,  R3D],  [R2P]>,  DwarfRegNum<[2]>;
 def R4Q  : GPR128< 4,  "r4", [R4D,  R5D],  [R4P]>,  DwarfRegNum<[4]>;
@@ -106,6 +121,7 @@ def R8Q  : GPR128< 8,  "r8", [R8D,  R9D],  [R8P]>,  DwarfRegNum<[8]>;
 def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>;
 def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>;
 def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>;
+}
 
 // Floating-point registers
 def F0S  : FPRS< 0,  "f0">, DwarfRegNum<[16]>;
@@ -125,6 +141,7 @@ def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
 def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
 def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
 
+let SubRegIndices = [subreg_32bit] in {
 def F0L  : FPRL< 0,  "f0", [F0S]>,  DwarfRegNum<[16]>;
 def F1L  : FPRL< 1,  "f1", [F1S]>,  DwarfRegNum<[17]>;
 def F2L  : FPRL< 2,  "f2", [F2S]>,  DwarfRegNum<[18]>;
@@ -141,39 +158,11 @@ def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>;
 def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>;
 def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>;
 def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>;
+}
 
 // Status register
 def PSW : SystemZReg<"psw">;
 
-def subreg_32bit  : PatLeaf<(i32 1)>;
-def subreg_even32 : PatLeaf<(i32 1)>;
-def subreg_odd32  : PatLeaf<(i32 2)>;
-def subreg_even   : PatLeaf<(i32 3)>;
-def subreg_odd    : PatLeaf<(i32 4)>;
-
-def : SubRegSet<1, [R0D, R1D,  R2D,  R3D,  R4D,  R5D,  R6D,  R7D,
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
-                   [R0W, R1W,  R2W,  R3W,  R4W,  R5W,  R6W,  R7W,
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
-                   [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>;
-
-def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
-                   [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>;
-
-def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
-                   [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
-
-def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
-                   [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
-
-def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
-                   [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
-
-def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
-                   [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
-
 /// Register classes
 def GR32 : RegisterClass<"SystemZ", [i32], 32,
    // Volatile registers
@@ -276,7 +265,7 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
    // Volatile, but not allocable
    R14D, R15D]>
 {
-  let SubRegClassList = [GR32];
+  let SubRegClasses = [(GR32 subreg_32bit)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -323,7 +312,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
    // Volatile, but not allocable
    R14D, R15D]>
 {
-  let SubRegClassList = [ADDR32];
+  let SubRegClasses = [(ADDR32 subreg_32bit)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -366,7 +355,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
 def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
   [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
 {
-  let SubRegClassList = [GR32, GR32];
+  let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -402,7 +391,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
 def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
   [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
 {
-  let SubRegClassList = [GR32, GR32, GR64, GR64];
+  let SubRegClasses = [(GR32 subreg_even32, subreg_odd32),
+                         (GR64 subreg_even, subreg_odd)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -462,7 +452,7 @@ def FP32 : RegisterClass<"SystemZ", [f32], 32,
 def FP64 : RegisterClass<"SystemZ", [f64], 64,
  [F0L, F1L,  F2L,  F3L,  F4L,  F5L,  F6L,  F7L, 
   F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
-  let SubRegClassList = [FP32];
+  let SubRegClasses = [(FP32 subreg_32bit)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 87c831b..3eabcd2 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "systemz-selectiondag-info"
-#include "SystemZSelectionDAGInfo.h"
+#include "SystemZTargetMachine.h"
 using namespace llvm;
 
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() {
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 5292de9..1450401 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class SystemZTargetMachine;
+
 class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  SystemZSelectionDAGInfo();
+  explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
   ~SystemZSelectionDAGInfo();
 };
 
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index dfa26a1..f45827b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -29,7 +29,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
     Subtarget(TT, FS),
     DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
                "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
-    InstrInfo(*this), TLInfo(*this),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
 
   if (getRelocationModel() == Reloc::Default)
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index d3357cc..6af829b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -17,6 +17,7 @@
 
 #include "SystemZInstrInfo.h"
 #include "SystemZISelLowering.h"
+#include "SystemZSelectionDAGInfo.h"
 #include "SystemZRegisterInfo.h"
 #include "SystemZSubtarget.h"
 #include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
   const TargetData        DataLayout;       // Calculates type size & alignment
   SystemZInstrInfo        InstrInfo;
   SystemZTargetLowering   TLInfo;
+  SystemZSelectionDAGInfo TSInfo;
 
   // SystemZ does not have any call stack frame, therefore not having
   // any SystemZ specific FrameInfo class.
@@ -53,6 +55,10 @@ public:
     return &TLInfo;
   }
 
+  virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
 }; // SystemZTargetMachine.
 
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index ac67c91..df52368 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -212,7 +212,8 @@ FunctionSections("ffunction-sections",
 //
 
 TargetMachine::TargetMachine(const Target &T) 
-  : TheTarget(T), AsmInfo(0) {
+  : TheTarget(T), AsmInfo(0),
+    MCRelaxAll(false) {
   // Typically it will be subtargets that will adjust FloatABIType from Default
   // to Soft or Hard.
   if (UseSoftFloat)
@@ -273,13 +274,14 @@ namespace llvm {
   /// DisableFramePointerElim - This returns true if frame pointer elimination
   /// optimization should be disabled for the given machine function.
   bool DisableFramePointerElim(const MachineFunction &MF) {
-    if (NoFramePointerElim)
-      return true;
-    if (NoFramePointerElimNonLeaf) {
+    // Check to see if we should eliminate non-leaf frame pointers and then
+    // check to see if we should eliminate all frame pointers.
+    if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
       const MachineFrameInfo *MFI = MF.getFrameInfo();
       return MFI->hasCalls();
     }
-    return false;
+
+    return NoFramePointerElim;
   }
 
   /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 52983ff..dcc5f61 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -22,14 +22,14 @@ using namespace llvm;
 
 TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
                              regclass_iterator RCB, regclass_iterator RCE,
+                             const char *const *subregindexnames,
                              int CFSO, int CFDO,
                              const unsigned* subregs, const unsigned subregsize,
-                         const unsigned* superregs, const unsigned superregsize,
                          const unsigned* aliases, const unsigned aliasessize)
   : SubregHash(subregs), SubregHashSize(subregsize),
-    SuperregHash(superregs), SuperregHashSize(superregsize),
     AliasesHash(aliases), AliasesHashSize(aliasessize),
-    Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) {
+    Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+    RegClassBegin(RCB), RegClassEnd(RCE) {
   assert(NumRegs < FirstVirtualRegister &&
          "Target has too many physical registers!");
 
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6b403c1..40a6a7b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand {
 
   bool isImm() const { return Kind == Immediate; }
   
-  bool isImmSExt8() const { 
-    // Accept immediates which fit in 8 bits when sign extended, and
-    // non-absolute immediates.
+  bool isImmSExti16i8() const {
     if (!isImm())
       return false;
 
-    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
-      int64_t Value = CE->getValue();
-      return Value == (int64_t) (int8_t) Value;
-    }
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
 
-    return true;
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
   }
-  
+  bool isImmSExti32i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti64i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti64i32() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000007FFFFFFFULL)||
+            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+
   bool isMem() const { return Kind == Memory; }
 
   bool isAbsMem() const {
@@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand {
     addExpr(Inst, getImm());
   }
 
-  void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
-    // FIXME: Support user customization of the render method.
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 5) && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
@@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 bool X86ATTAsmParser::
 ParseInstruction(const StringRef &Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The various flavors of pushf and popf use Requires<In32BitMode> and
+  // Requires<In64BitMode>, but the assembler doesn't yet implement that.
+  // For now, just do a manual check to prevent silent misencoding.
+  if (Is64Bit) {
+    if (Name == "popfl")
+      return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
+    else if (Name == "pushfl")
+      return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+  } else {
+    if (Name == "popfq")
+      return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
+    else if (Name == "pushfq")
+      return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
+  }
+
   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
   // represent alternative syntaxes in the .td file, without requiring
   // instruction duplication.
@@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
     .Case("repe", "rep")
     .Case("repz", "rep")
     .Case("repnz", "repne")
+    .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
+    .Case("popf",  Is64Bit ? "popfq"  : "popfl")
+    .Case("retl", Is64Bit ? "retl" : "ret")
+    .Case("retq", Is64Bit ? "ret" : "retq")
+    .Case("setz", "sete")
+    .Case("setnz", "setne")
+    .Case("jz", "je")
+    .Case("jnz", "jne")
+    .Case("cmovcl", "cmovbl")
+    .Case("cmovcl", "cmovbl")
+    .Case("cmovnal", "cmovbel")
+    .Case("cmovnbl", "cmovael")
+    .Case("cmovnbel", "cmoval")
+    .Case("cmovncl", "cmovael")
+    .Case("cmovngl", "cmovlel")
+    .Case("cmovnl", "cmovgel")
+    .Case("cmovngl", "cmovlel")
+    .Case("cmovngel", "cmovll")
+    .Case("cmovnll", "cmovgel")
+    .Case("cmovnlel", "cmovgl")
+    .Case("cmovnzl", "cmovnel")
+    .Case("cmovzl", "cmovel")
     .Default(Name);
+
+  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+  const MCExpr *ExtraImmOp = 0;
+  if (PatchedName.startswith("cmp") &&
+      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+    unsigned SSEComparisonCode = StringSwitch<unsigned>(
+      PatchedName.slice(3, PatchedName.size() - 2))
+      .Case("eq", 0)
+      .Case("lt", 1)
+      .Case("le", 2)
+      .Case("unord", 3)
+      .Case("neq", 4)
+      .Case("nlt", 5)
+      .Case("nle", 6)
+      .Case("ord", 7)
+      .Default(~0U);
+    if (SSEComparisonCode != ~0U) {
+      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+                                          getParser().getContext());
+      if (PatchedName.endswith("ss")) {
+        PatchedName = "cmpss";
+      } else if (PatchedName.endswith("sd")) {
+        PatchedName = "cmpsd";
+      } else if (PatchedName.endswith("ps")) {
+        PatchedName = "cmpps";
+      } else {
+        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+        PatchedName = "cmppd";
+      }
+    }
+  }
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
+  if (ExtraImmOp)
+    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 
     // Parse '*' modifier.
@@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
       Operands.push_back(Op);
     else
       return true;
-    
+
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();  // Eat the comma.
 
@@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
     Operands.erase(Operands.begin() + 1);
   }
 
+  // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
+  // "f{mul*,add*,sub*,div*} $op"
+  if ((Name.startswith("fmul") || Name.startswith("fadd") ||
+       Name.startswith("fsub") || Name.startswith("fdiv")) &&
+      Operands.size() == 3 &&
+      static_cast<X86Operand*>(Operands[2])->isReg() &&
+      static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
+    delete Operands[2];
+    Operands.erase(Operands.begin() + 2);
+  }
+
   return false;
 }
 
@@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   return false;
 }
 
+/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
+/// imm operand, to having "rm" or "mr" operands with the offset in the disp
+/// field.
+static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
+                         bool isMR) {
+  MCOperand Disp = Inst.getOperand(0);
+
+  // Start over with an empty instruction.
+  Inst = MCInst();
+  Inst.setOpcode(Opc);
+  
+  if (!isMR)
+    Inst.addOperand(MCOperand::CreateReg(RegNo));
+  
+  // Add the mem operand.
+  Inst.addOperand(MCOperand::CreateReg(0));  // Segment
+  Inst.addOperand(MCOperand::CreateImm(1));  // Scale
+  Inst.addOperand(MCOperand::CreateReg(0));  // IndexReg
+  Inst.addOperand(Disp);                     // Displacement
+  Inst.addOperand(MCOperand::CreateReg(0));  // BaseReg
+ 
+  if (isMR)
+    Inst.addOperand(MCOperand::CreateReg(RegNo));
+}
+
 // FIXME: Custom X86 cleanup function to implement a temporary hack to handle
 // matching INCL/DECL correctly for x86_64. This needs to be replaced by a
 // proper mechanism for supporting (ambiguous) feature dependent instructions.
@@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
   case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
   case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
   case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+      
+  // moffset instructions are x86-32 only.
+  case X86::MOV8o8a:   LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
+  case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
+  case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
+  case X86::MOV8ao8:   LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
+  case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
+  case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
   }
 }
 
@@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
   bool MatchW = MatchInstructionImpl(Operands, Inst);
   Tmp[Base.size()] = 'l';
   bool MatchL = MatchInstructionImpl(Operands, Inst);
+  Tmp[Base.size()] = 'q';
+  bool MatchQ = MatchInstructionImpl(Operands, Inst);
 
   // Restore the old token.
   Op->setTokenValue(Base);
@@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
   // If exactly one matched, then we treat that as a successful match (and the
   // instruction will already have been filled in correctly, since the failing
   // matches won't have modified it).
-  if (MatchB + MatchW + MatchL == 2)
+  if (MatchB + MatchW + MatchL + MatchQ == 3)
     return false;
 
   // Otherwise, the match failed.
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 8b0ed1c..183213d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SetupMachineFunction(MF);
 
   if (Subtarget->isTargetCOFF()) {
-    const Function *F = MF.getFunction();
-    OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) +
-                            ";\t.scl\t" +
-                Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) +
-                            ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT)
-                            + ";\t.endef");
+    bool Intrn = MF.getFunction()->hasInternalLinkage();
+    OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
+    OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+    OutStreamer.EndCOFFSymbolDef();
   }
 
   // Have common code print out the function header with linkage info etc.
@@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
 
     // Emit type information for external functions
-    for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(),
-           E = COFFMMI.stub_end(); I != E; ++I) {
-      OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) +
-                              ";\t.scl\t" + Twine(COFF::C_EXT) +
-                              ";\t.type\t" +
-                              Twine(COFF::DT_FCN << COFF::N_BTSHFT) +
-                              ";\t.endef");
+    typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+    for (externals_iterator I = COFFMMI.externals_begin(),
+                            E = COFFMMI.externals_end();
+                            I != E; ++I) {
+      OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+      OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
+      OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+      OutStreamer.EndCOFFSymbolDef();
     }
 
-    if (Subtarget->isTargetCygMing()) {
-      // Necessary for dllexport support
-      std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
-
-      const TargetLoweringObjectFileCOFF &TLOFCOFF =
-        static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
-
-      for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
-        if (I->hasDLLExportLinkage())
-          DLLExportedFns.push_back(Mang->getSymbol(I));
-
-      for (Module::const_global_iterator I = M.global_begin(),
-             E = M.global_end(); I != E; ++I)
-        if (I->hasDLLExportLinkage())
-          DLLExportedGlobals.push_back(Mang->getSymbol(I));
-
-      // Output linker support code for dllexported globals on windows.
-      if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
-        OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve",
-                                                          true,
-                                                   SectionKind::getMetadata()));
-        for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i)
-          OutStreamer.EmitRawText("\t.ascii \" -export:" +
-                                  Twine(DLLExportedGlobals[i]->getName()) +
-                                  ",data\"");
-
-        for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i)
-          OutStreamer.EmitRawText("\t.ascii \" -export:" +
-                                  Twine(DLLExportedFns[i]->getName()) + "\"");
+    // Necessary for dllexport support
+    std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+    const TargetLoweringObjectFileCOFF &TLOFCOFF =
+      static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedFns.push_back(Mang->getSymbol(I));
+
+    for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+    // Output linker support code for dllexported globals on windows.
+    if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+      OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+      SmallString<128> name;
+      for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+        if (Subtarget->isTargetWindows())
+          name = " /EXPORT:";
+        else
+          name = " -export:";
+        name += DLLExportedGlobals[i]->getName();
+        if (Subtarget->isTargetWindows())
+          name += ",DATA";
+        else
+        name += ",data";
+        OutStreamer.EmitBytes(name, 0);
+      }
+
+      for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+        if (Subtarget->isTargetWindows())
+          name = " /EXPORT:";
+        else
+          name = " -export:";
+        name += DLLExportedFns[i]->getName();
+        OutStreamer.EmitBytes(name, 0);
       }
     }
   }
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 95984b2..b5a7f8d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -31,7 +31,7 @@ class MCInst;
 class MCStreamer;
 class MCSymbol;
 
-class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   const X86Subtarget *Subtarget;
  public:
   explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index effc8ed..4edeca9 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
   OutMI.addOperand(OutMI.getOperand(0));
 }
 
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+  unsigned ImmOp = Inst.getNumOperands() - 1;
+  assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+         ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+           Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+          Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+  // Check whether the destination register can be fixed.
+  unsigned Reg = Inst.getOperand(0).getReg();
+  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+    return;
+
+  // If so, rewrite the instruction.
+  MCOperand Saved = Inst.getOperand(ImmOp);
+  Inst = MCInst();
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+  bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+  unsigned AddrBase = IsStore;
+  unsigned RegOp = IsStore ? 0 : 5;
+  unsigned AddrOp = AddrBase + 3;
+  assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+         Inst.getOperand(AddrBase + 0).isReg() && // base
+         Inst.getOperand(AddrBase + 1).isImm() && // scale
+         Inst.getOperand(AddrBase + 2).isReg() && // index register
+         (Inst.getOperand(AddrOp).isExpr() ||     // address
+          Inst.getOperand(AddrOp).isImm())&&
+         Inst.getOperand(AddrBase + 4).isReg() && // segment
+         "Unexpected instruction!");
+
+  // Check whether the destination register can be fixed.
+  unsigned Reg = Inst.getOperand(RegOp).getReg();
+  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+    return;
+
+  // Check whether this is an absolute address.
+  if (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+      Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+      Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+      Inst.getOperand(AddrBase + 1).getImm() != 1)
+    return;
+
+  // If so, rewrite the instruction.
+  MCOperand Saved = Inst.getOperand(AddrOp);
+  Inst = MCInst();
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(Saved);
+}
 
 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
@@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV64r0 -> MOV32r0
     LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
-      
-      
+
+  // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+  // register inputs modeled as normal uses instead of implicit uses.  As such,
+  // truncate off all but the first operand (the callee).  FIXME: Change isel.
+  case X86::TAILJMPr:
+  case X86::TAILJMPr64:
+  case X86::CALL64r:
+  case X86::CALL64pcrel32: {
+    unsigned Opcode = OutMI.getOpcode();
+    MCOperand Saved = OutMI.getOperand(0);
+    OutMI = MCInst();
+    OutMI.setOpcode(Opcode);
+    OutMI.addOperand(Saved);
+    break;
+  }
+
+  // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+  case X86::TAILJMPd:
+  case X86::TAILJMPd64: {
+    MCOperand Saved = OutMI.getOperand(0);
+    OutMI = MCInst();
+    OutMI.setOpcode(X86::TAILJMP_1);
+    OutMI.addOperand(Saved);
+    break;
+  }
+
   // The assembler backend wants to see branches in their small form and relax
   // them to their large form.  The JIT can only handle the large form because
   // it does not do relaxation.  For now, translate the large form to the
@@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
   case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
   case X86::JG_4:  OutMI.setOpcode(X86::JG_1); break;
+
+  // We don't currently select the correct instruction form for instructions
+  // which have a short %eax, etc. form. Handle this by custom lowering, for
+  // now.
+  //
+  // Note, we are currently not handling the following instructions:
+  // MOV64ao8, MOV64o8a
+  // XCHG16ar, XCHG32ar, XCHG64ar
+  case X86::MOV8mr_NOREX:
+  case X86::MOV8mr:     SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+  case X86::MOV8rm_NOREX:
+  case X86::MOV8rm:     SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
+  case X86::MOV16mr:    SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
+  case X86::MOV16rm:    SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
+  case X86::MOV32mr:    SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
+  case X86::MOV32rm:    SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
+  case X86::MOV64mr:    SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
+  case X86::MOV64rm:    SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+
+  case X86::ADC8ri:     SimplifyShortImmForm(OutMI, X86::ADC8i8);    break;
+  case X86::ADC16ri:    SimplifyShortImmForm(OutMI, X86::ADC16i16);  break;
+  case X86::ADC32ri:    SimplifyShortImmForm(OutMI, X86::ADC32i32);  break;
+  case X86::ADC64ri32:  SimplifyShortImmForm(OutMI, X86::ADC64i32);  break;
+  case X86::ADD8ri:     SimplifyShortImmForm(OutMI, X86::ADD8i8);    break;
+  case X86::ADD16ri:    SimplifyShortImmForm(OutMI, X86::ADD16i16);  break;
+  case X86::ADD32ri:    SimplifyShortImmForm(OutMI, X86::ADD32i32);  break;
+  case X86::ADD64ri32:  SimplifyShortImmForm(OutMI, X86::ADD64i32);  break;
+  case X86::AND8ri:     SimplifyShortImmForm(OutMI, X86::AND8i8);    break;
+  case X86::AND16ri:    SimplifyShortImmForm(OutMI, X86::AND16i16);  break;
+  case X86::AND32ri:    SimplifyShortImmForm(OutMI, X86::AND32i32);  break;
+  case X86::AND64ri32:  SimplifyShortImmForm(OutMI, X86::AND64i32);  break;
+  case X86::CMP8ri:     SimplifyShortImmForm(OutMI, X86::CMP8i8);    break;
+  case X86::CMP16ri:    SimplifyShortImmForm(OutMI, X86::CMP16i16);  break;
+  case X86::CMP32ri:    SimplifyShortImmForm(OutMI, X86::CMP32i32);  break;
+  case X86::CMP64ri32:  SimplifyShortImmForm(OutMI, X86::CMP64i32);  break;
+  case X86::OR8ri:      SimplifyShortImmForm(OutMI, X86::OR8i8);     break;
+  case X86::OR16ri:     SimplifyShortImmForm(OutMI, X86::OR16i16);   break;
+  case X86::OR32ri:     SimplifyShortImmForm(OutMI, X86::OR32i32);   break;
+  case X86::OR64ri32:   SimplifyShortImmForm(OutMI, X86::OR64i32);   break;
+  case X86::SBB8ri:     SimplifyShortImmForm(OutMI, X86::SBB8i8);    break;
+  case X86::SBB16ri:    SimplifyShortImmForm(OutMI, X86::SBB16i16);  break;
+  case X86::SBB32ri:    SimplifyShortImmForm(OutMI, X86::SBB32i32);  break;
+  case X86::SBB64ri32:  SimplifyShortImmForm(OutMI, X86::SBB64i32);  break;
+  case X86::SUB8ri:     SimplifyShortImmForm(OutMI, X86::SUB8i8);    break;
+  case X86::SUB16ri:    SimplifyShortImmForm(OutMI, X86::SUB16i16);  break;
+  case X86::SUB32ri:    SimplifyShortImmForm(OutMI, X86::SUB32i32);  break;
+  case X86::SUB64ri32:  SimplifyShortImmForm(OutMI, X86::SUB64i32);  break;
+  case X86::TEST8ri:    SimplifyShortImmForm(OutMI, X86::TEST8i8);   break;
+  case X86::TEST16ri:   SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+  case X86::TEST32ri:   SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+  case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+  case X86::XOR8ri:     SimplifyShortImmForm(OutMI, X86::XOR8i8);    break;
+  case X86::XOR16ri:    SimplifyShortImmForm(OutMI, X86::XOR16i16);  break;
+  case X86::XOR32ri:    SimplifyShortImmForm(OutMI, X86::XOR32i32);  break;
+  case X86::XOR64ri32:  SimplifyShortImmForm(OutMI, X86::XOR64i32);  break;
   }
 }
 
@@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   // cast away const; DIetc do not take const operands for some reason.
   DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
   if (V.getContext().isSubprogram())
-    O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+    O << DISubprogram(V.getContext()).getDisplayName() << ":";
   O << V.getName();
   O << " <- ";
   // Frame address.  Currently handles register +- offset only.
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index ebd23f6..9e5474f 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
   class X86Subtarget;
   
 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN X86MCInstLower {
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
   MCContext &Ctx;
   Mangler *Mang;
   X86AsmPrinter &AsmPrinter;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index da6bb91..1334820 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -39,7 +39,12 @@ set(sources
 
 if( CMAKE_CL_64 )
   enable_language(ASM_MASM)
-  set(sources ${sources} X86CompilationCallback_Win64.asm)
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+    COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+   )
+   set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
 endif()
 
 add_llvm_target(X86CodeGen ${sources})
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 62e7357..8a5a630 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
 ///
 /// @param mcInst       - The MCInst to append to.
 /// @param immediate    - The immediate value to append.
-static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The internal instruction.
+static void translateImmediate(MCInst &mcInst, 
+                               uint64_t immediate, 
+                               OperandSpecifier &operand,
+                               InternalInstruction &insn) {
+  // Sign-extend the immediate if necessary.
+
+  OperandType type = operand.type;
+
+  if (type == TYPE_RELv) {
+    switch (insn.displacementSize) {
+    default:
+      break;
+    case 8:
+      type = TYPE_MOFFS8;
+      break;
+    case 16:
+      type = TYPE_MOFFS16;
+      break;
+    case 32:
+      type = TYPE_MOFFS32;
+      break;
+    case 64:
+      type = TYPE_MOFFS64;
+      break;
+    }
+  }
+
+  switch (type) {
+  case TYPE_MOFFS8:
+  case TYPE_REL8:
+    if(immediate & 0x80)
+      immediate |= ~(0xffull);
+    break;
+  case TYPE_MOFFS16:
+    if(immediate & 0x8000)
+      immediate |= ~(0xffffull);
+    break;
+  case TYPE_MOFFS32:
+  case TYPE_REL32:
+  case TYPE_REL64:
+    if(immediate & 0x80000000)
+      immediate |= ~(0xffffffffull);
+    break;
+  case TYPE_MOFFS64:
+  default:
+    // operand is 64 bits wide.  Do nothing.
+    break;
+  }
+    
   mcInst.addOperand(MCOperand::CreateImm(immediate));
 }
 
@@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst,
   case TYPE_XMM64:
   case TYPE_XMM128:
   case TYPE_DEBUGREG:
-  case TYPE_CR32:
-  case TYPE_CR64:
+  case TYPE_CONTROLREG:
     return translateRMRegister(mcInst, insn);
   case TYPE_M:
   case TYPE_M8:
@@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst,
   case ENCODING_IO:
   case ENCODING_Iv:
   case ENCODING_Ia:
-    translateImmediate(mcInst, 
-                       insn.immediates[insn.numImmediatesTranslated++]);
+    translateImmediate(mcInst,
+                       insn.immediates[insn.numImmediatesTranslated++],
+                       operand,
+                       insn);
     return false;
   case ENCODING_RB:
   case ENCODING_RW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 64f6b2d..6c3ff6b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) {
       if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_DR0 + index;                        \
-    case TYPE_CR32:                                       \
-      if (index > 7)                                      \
-        *valid = 0;                                       \
-      return prefix##_ECR0 + index;                       \
-    case TYPE_CR64:                                       \
+    case TYPE_CONTROLREG:                                 \
       if (index > 8)                                      \
         *valid = 0;                                       \
-      return prefix##_RCR0 + index;                       \
+      return prefix##_CR0 + index;                        \
     }                                                     \
   }
 
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 462cf68..28ba86b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -225,26 +225,16 @@ extern "C" {
   ENTRY(DR6)        \
   ENTRY(DR7)
 
-#define REGS_CONTROL_32BIT  \
-  ENTRY(ECR0)               \
-  ENTRY(ECR1)               \
-  ENTRY(ECR2)               \
-  ENTRY(ECR3)               \
-  ENTRY(ECR4)               \
-  ENTRY(ECR5)               \
-  ENTRY(ECR6)               \
-  ENTRY(ECR7)
-
-#define REGS_CONTROL_64BIT  \
-  ENTRY(RCR0)               \
-  ENTRY(RCR1)               \
-  ENTRY(RCR2)               \
-  ENTRY(RCR3)               \
-  ENTRY(RCR4)               \
-  ENTRY(RCR5)               \
-  ENTRY(RCR6)               \
-  ENTRY(RCR7)               \
-  ENTRY(RCR8)
+#define REGS_CONTROL  \
+  ENTRY(CR0)          \
+  ENTRY(CR1)          \
+  ENTRY(CR2)          \
+  ENTRY(CR3)          \
+  ENTRY(CR4)          \
+  ENTRY(CR5)          \
+  ENTRY(CR6)          \
+  ENTRY(CR7)          \
+  ENTRY(CR8)
   
 #define ALL_EA_BASES  \
   EA_BASES_16BIT      \
@@ -264,8 +254,7 @@ extern "C" {
   REGS_XMM            \
   REGS_SEGMENT        \
   REGS_DEBUG          \
-  REGS_CONTROL_32BIT  \
-  REGS_CONTROL_64BIT  \
+  REGS_CONTROL        \
   ENTRY(RIP)
 
 /*
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 4a7cd57..0f33f52 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -280,8 +280,7 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
-  ENUM_ENTRY(TYPE_CR32,       "4-byte control register operand")               \
-  ENUM_ENTRY(TYPE_CR64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
                                                                                \
   ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
   ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index 5e80845..dab070e 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0;
 /// Translate TRI register number to an index into our smaller tables of
 /// interesting registers. Return -1 for boring registers.
 int SSEDomainFixPass::RegIndex(unsigned reg) {
-  // Registers are sorted lexicographically.
-  // We just need them to be consecutive, ordering doesn't matter.
-  assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+  assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
   reg -= X86::XMM0;
   return reg < NumRegs ? (int) reg : -1;
 }
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index ba9c1d0..151087f 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -12,6 +12,7 @@
 #include "X86FixupKinds.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -43,20 +44,19 @@ public:
   X86AsmBackend(const Target &T)
     : TargetAsmBackend(T) {}
 
-  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF,
+  void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
                   uint64_t Value) const {
-    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
 
-    assert(Fixup.Offset + Size <= DF.getContents().size() &&
+    assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
            "Invalid fixup offset!");
     for (unsigned i = 0; i != Size; ++i)
-      DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
+      DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
   }
 
-  bool MayNeedRelaxation(const MCInst &Inst,
-                         const SmallVectorImpl<MCAsmFixup> &Fixups) const;
+  bool MayNeedRelaxation(const MCInst &Inst) const;
 
-  void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
   bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
 };
@@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
   case X86::JG_1:  return X86::JG_4;
   case X86::JLE_1: return X86::JLE_4;
   case X86::JL_1:  return X86::JL_4;
+  case X86::TAILJMP_1:
   case X86::JMP_1: return X86::JMP_4;
   case X86::JNE_1: return X86::JNE_4;
   case X86::JNO_1: return X86::JNO_4;
@@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) {
   }
 }
 
-bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst,
-                              const SmallVectorImpl<MCAsmFixup> &Fixups) const {
-  // Check for a 1byte pcrel fixup, and enforce that we would know how to relax
-  // this instruction.
-  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
-    if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) {
-      assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode());
-      return true;
-    }
-  }
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // Check if this instruction is ever relaxable.
+  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+    return false;
 
-  return false;
+  // If so, just assume it can be relaxed. Once we support relaxing more complex
+  // instructions we should check that the instruction actually has symbolic
+  // operands before doing this, but we need to be careful about things like
+  // PCrel.
+  return true;
 }
 
 // FIXME: Can tblgen help at all here to verify there aren't other instructions
 // we can relax?
-void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
-                                     MCInst &Res) const {
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
-  unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode());
+  unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
 
-  if (RelaxedOp == IF->getInst().getOpcode()) {
+  if (RelaxedOp == Inst.getOpcode()) {
     SmallString<256> Tmp;
     raw_svector_ostream OS(Tmp);
-    IF->getInst().dump_pretty(OS);
+    Inst.dump_pretty(OS);
+    OS << "\n";
     report_fatal_error("unexpected instruction to relax: " + OS.str());
   }
 
-  Res = IF->getInst();
+  Res = Inst;
   Res.setOpcode(RelaxedOp);
 }
 
@@ -199,6 +198,18 @@ public:
   }
 };
 
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_32AsmBackend(const Target &T)
+    : ELFX86AsmBackend(T) {}
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_64AsmBackend(const Target &T)
+    : ELFX86AsmBackend(T) {}
+};
+
 class DarwinX86AsmBackend : public X86AsmBackend {
 public:
   DarwinX86AsmBackend(const Target &T)
@@ -210,7 +221,8 @@ public:
   bool isVirtualSection(const MCSection &Section) const {
     const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
     return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
-            SMO.getType() == MCSectionMachO::S_GB_ZEROFILL);
+            SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
+            SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
   }
 };
 
@@ -247,6 +259,26 @@ public:
     const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
     return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
   }
+
+  virtual bool isSectionAtomizable(const MCSection &Section) const {
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+    switch (SMO.getType()) {
+    default:
+      return true;
+
+    case MCSectionMachO::S_4BYTE_LITERALS:
+    case MCSectionMachO::S_8BYTE_LITERALS:
+    case MCSectionMachO::S_16BYTE_LITERALS:
+    case MCSectionMachO::S_LITERAL_POINTERS:
+    case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+    case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+    case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+    case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+    case MCSectionMachO::S_INTERPOSING:
+      return false;
+    }
+  }
 };
 
 }
@@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
   case Triple::Darwin:
     return new DarwinX86_32AsmBackend(T);
   default:
-    return new ELFX86AsmBackend(T);
+    return new ELFX86_32AsmBackend(T);
   }
 }
 
@@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
   case Triple::Darwin:
     return new DarwinX86_64AsmBackend(T);
   default:
-    return new ELFX86AsmBackend(T);
+    return new ELFX86_64AsmBackend(T);
   }
 }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index eece462..98ab2a6 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,7 +15,7 @@
 #define X86COFF_MACHINEMODULEINFO_H
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/DenseSet.h"
 #include "X86MachineFunctionInfo.h"
 
 namespace llvm {
@@ -25,18 +25,18 @@ namespace llvm {
 /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
 /// for X86 COFF targets.
 class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
-  StringSet<> CygMingStubs;
+  DenseSet<MCSymbol const *> Externals;
 public:
   X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
   virtual ~X86COFFMachineModuleInfo();
 
-  void addExternalFunction(StringRef Name) {
-    CygMingStubs.insert(Name);
+  void addExternalFunction(MCSymbol* Symbol) {
+    Externals.insert(Symbol);
   }
     
-  typedef StringSet<>::const_iterator stub_iterator;
-  stub_iterator stub_begin() const { return CygMingStubs.begin(); }
-  stub_iterator stub_end() const { return CygMingStubs.end(); }
+  typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+  externals_iterator externals_begin() const { return Externals.begin(); }
+  externals_iterator externals_end() const { return Externals.end(); }
 };
 
 
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index fd15efd..a5774e1 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[
   CCDelegateTo<CC_X86_32_Common>
 ]>;
 
+def CC_X86_32_ThisCall : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first integer argument is passed in ECX
+  CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
 def CC_X86_32_FastCC : CallingConv<[
   // Handles byval parameters.  Note that we can't rely on the delegation
   // to CC_X86_32_Common for this because that happens after code that
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ff9208c..1bc5eb7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
 
   if (CC == CallingConv::X86_FastCall)
     return CC_X86_32_FastCall;
+  else if (CC == CallingConv::X86_ThisCall)
+    return CC_X86_32_ThisCall;
   else if (CC == CallingConv::Fast)
     return CC_X86_32_FastCC;
   else if (CC == CallingConv::GHC)
@@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
                                     unsigned Src, EVT SrcVT,
                                     unsigned &ResultReg) {
-  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
+  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+                           Src, /*TODO: Kill=*/false);
   
   if (RR != 0) {
     ResultReg = RR;
@@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
                    (S == 1 || S == 2 || S == 4 || S == 8)) {
           // Scaled-index addressing.
           Scale = S;
-          IndexReg = getRegForGEPIndex(Op);
+          IndexReg = getRegForGEPIndex(Op).first;
           if (IndexReg == 0)
             return false;
         } else
@@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
     unsigned ResultReg = getRegForValue(I->getOperand(0));
     if (ResultReg == 0) return false;
     // Set the high bits to zero.
-    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
+    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
     if (ResultReg == 0) return false;
     UpdateValueMap(I, ResultReg);
     return true;
@@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
                RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
           const MachineInstr &MI = *RI;
 
-          if (MI.modifiesRegister(Reg)) {
+          if (MI.definesRegister(Reg)) {
             unsigned Src, Dst, SrcSR, DstSR;
 
             if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
@@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
   
   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   if (Op1Reg == 0) return false;
-  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
+  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
 
   // The shift instruction uses X86::CL. If we defined a super-register
   // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
   // we're doing here.
   if (CReg != X86::CL)
     BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
-      .addReg(CReg).addImm(X86::SUBREG_8BIT);
+      .addReg(CReg).addImm(X86::sub_8bit);
 
   unsigned ResultReg = createResultReg(RC);
   BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
@@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
 
   // Then issue an extract_subreg.
   unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
-                                                  CopyReg, X86::SUBREG_8BIT);
+                                                  CopyReg, /*Kill=*/true,
+                                                  X86::sub_8bit);
   if (!ResultReg)
     return false;
 
@@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     }
     case CCValAssign::BCvt: {
       unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
-                               ISD::BIT_CONVERT, Arg);
+                               ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
       assert(BC != 0 && "Failed to emit a bitcast!");
       Arg = BC;
       ArgVT = VA.getLocVT();
@@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     if (VA.isRegLoc()) {
       TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
       bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
-                                      Arg, RC, RC);
+                                      Arg, RC, RC, DL);
       assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
       Emitted = true;
       RegArgs.push_back(VA.getLocReg());
@@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   if (Subtarget->isPICStyleGOT()) {
     TargetRegisterClass *RC = X86::GR32RegisterClass;
     unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
-    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
+    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
+                                    DL);
     assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
     Emitted = true;
   }
@@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
 
     unsigned ResultReg = createResultReg(DstRC);
     bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                    RVLocs[0].getLocReg(), DstRC, SrcRC);
+                                    RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
     assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
     Emitted = true;
     if (CopyVT != RVLocs[0].getValVT()) {
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 541083f..747683d 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -14,7 +14,6 @@
 #define DEBUG_TYPE "x86-codegen"
 #include "X86.h"
 #include "X86InstrInfo.h"
-#include "X86Subtarget.h"
 #include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -42,12 +41,70 @@ namespace {
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; }
+    virtual const char *getPassName() const {
+      return "X86 FP_REG_KILL inserter";
+    }
   };
   char FPRegKiller::ID = 0;
 }
 
-FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); }
+FunctionPass *llvm::createX87FPRegKillInserterPass() {
+  return new FPRegKiller();
+}
+
+/// isFPStackVReg - Return true if the specified vreg is from a fp stack
+/// register class.
+static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(RegNo))
+    return false;
+  
+  switch (MRI.getRegClass(RegNo)->getID()) {
+  default: return false;
+  case X86::RFP32RegClassID:
+  case X86::RFP64RegClassID:
+  case X86::RFP80RegClassID:
+  return true;
+  }
+}
+
+
+/// ContainsFPStackCode - Return true if the specific MBB has floating point
+/// stack code, and thus needs an FP_REG_KILL.
+static bool ContainsFPStackCode(MachineBasicBlock *MBB,
+                                const MachineRegisterInfo &MRI) {
+  // Scan the block, looking for instructions that define fp stack vregs.
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+       I != E; ++I) {
+    if (I->getNumOperands() == 0 || !I->getOperand(0).isReg())
+      continue;
+    
+    for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+      if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
+        continue;
+      
+      if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
+        return true;
+    }
+  }
+  
+  // Check PHI nodes in successor blocks.  These PHI's will be lowered to have
+  // a copy of the input value in this block, which is a definition of the
+  // value.
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+       E = MBB->succ_end(); SI != E; ++ SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
+         I != E; ++I) {
+      // All PHI nodes are at the top of the block.
+      if (!I->isPHI()) break;
+      
+      if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
+        return true;
+    }
+  }
+  
+  return false;
+}                                 
 
 bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
   // If we are emitting FP stack code, scan the basic block to determine if this
@@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
 
   // Fast-path: If nothing is using the x87 registers, we don't need to do
   // any scanning.
-  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
       MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
       MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
     return false;
 
   bool Changed = false;
-  const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
   MachineFunction::iterator MBBI = MF.begin();
   MachineFunction::iterator EndMBB = MF.end();
   for (; MBBI != EndMBB; ++MBBI) {
@@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
         continue;
     }
     
-    bool ContainsFPCode = false;
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         !ContainsFPCode && I != E; ++I) {
-      if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) {
-        const TargetRegisterClass *clas;
-        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
-          if (I->getOperand(op).isReg() && I->getOperand(op).isDef() &&
-            TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
-              ((clas = MRI.getRegClass(I->getOperand(op).getReg())) == 
-                 X86::RFP32RegisterClass ||
-               clas == X86::RFP64RegisterClass ||
-               clas == X86::RFP80RegisterClass)) {
-            ContainsFPCode = true;
-            break;
-          }
-        }
-      }
-    }
-    // Check PHI nodes in successor blocks.  These PHI's will be lowered to have
-    // a copy of the input value in this block.  In SSE mode, we only care about
-    // 80-bit values.
-    if (!ContainsFPCode) {
-      // Final check, check LLVM BB's that are successors to the LLVM BB
-      // corresponding to BB for FP PHI nodes.
-      const BasicBlock *LLVMBB = MBB->getBasicBlock();
-      const PHINode *PN;
-      for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
-           !ContainsFPCode && SI != E; ++SI) {
-        for (BasicBlock::const_iterator II = SI->begin();
-             (PN = dyn_cast<PHINode>(II)); ++II) {
-          if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
-              (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
-              (!Subtarget.hasSSE2() &&
-                PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
-            ContainsFPCode = true;
-            break;
-          }
-        }
-      }
-    }
-    // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
-    if (ContainsFPCode) {
+    // If we find any FP stack code, emit the FP_REG_KILL instruction.
+    if (ContainsFPStackCode(MBB, MRI)) {
       BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
               MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
       ++NumFPKill;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fd8bb1e..0f64383 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                                                 Result,
                                    CurDAG->getTargetConstant(8, MVT::i8)), 0);
         // Then truncate it down to i8.
-        Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+        Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
                                                 MVT::i8, Result);
       } else {
         Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                                       CurDAG->getTargetConstant(8, MVT::i8)),
                          0);
         // Then truncate it down to i8.
-        Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+        Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
                                                 MVT::i8, Result);
       } else {
         Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         }
 
         // Extract the l-register.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
                                                         MVT::i8, Reg);
 
         // Emit a testb.
@@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                                              Reg.getValueType(), Reg, RC), 0);
 
         // Extract the h-register.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
                                                         MVT::i8, Reg);
 
         // Emit a testb. No special NOREX tricks are needed since there's
@@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // Extract the 16-bit subregister.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
                                                         MVT::i16, Reg);
 
         // Emit a testw.
@@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // Extract the 32-bit subregister.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
                                                         MVT::i32, Reg);
 
         // Emit a testl.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6ce9ab7..b02c33d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // X86 is weird, it always uses i8 for shift amounts and setcc results.
   setShiftAmountType(MVT::i8);
   setBooleanContents(ZeroOrOneBooleanContent);
-  setSchedulingPreference(SchedulingForRegPressure);
+  setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(X86StackPtr);
 
   if (Subtarget->isTargetDarwin()) {
@@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
   } else if (!UseSoftFloat) {
-    if (X86ScalarSSEf64) {
-      // We have an impenetrably clever algorithm for ui64->double only.
-      setOperationAction(ISD::UINT_TO_FP   , MVT::i64  , Custom);
-    }
+    // We have an algorithm for SSE2->double, and we turn this into a
+    // 64-bit FILD followed by conditional FADD for other targets.
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
     // We have an algorithm for SSE2, and we turn this into a 64-bit
     // FILD for other targets.
-    setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
   }
 
   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
@@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   }
 
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
-  if (!X86ScalarSSEf64) {
+  if (!X86ScalarSSEf64) { 
     setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
     setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::BIT_CONVERT    , MVT::f64  , Expand);
+      // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
+      if (Subtarget->hasMMX() && !DisableMMX)
+        setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Custom);
+      else 
+        setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Expand);
+    }
   }
 
   // Scalar integer divide and remainder are lowered to use operations that
@@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
     setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
     setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
+
+    if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
+      setOperationAction(ISD::BIT_CONVERT,        MVT::v8i8,  Custom);
+      setOperationAction(ISD::BIT_CONVERT,        MVT::v4i16, Custom);
+      setOperationAction(ISD::BIT_CONVERT,        MVT::v2i32, Custom);
+      setOperationAction(ISD::BIT_CONVERT,        MVT::v2f32, Custom);
+      setOperationAction(ISD::BIT_CONVERT,        MVT::v1i64, Custom);
+    }
   }
 
   if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     MachineFunction &MF = DAG.getMachineFunction();
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
-    if (!Reg) {
-      Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64));
-      FuncInfo->setSRetReturnReg(Reg);
-    }
+    assert(Reg && 
+           "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
     Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
@@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg,
     return !Subtarget->is64Bit();
   case CallingConv::X86_FastCall:
     return !Subtarget->is64Bit();
+  case CallingConv::X86_ThisCall:
+    return !Subtarget->is64Bit();
   case CallingConv::Fast:
     return GuaranteedTailCallOpt;
   case CallingConv::GHC:
@@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
 
   if (CC == CallingConv::X86_FastCall)
     return CC_X86_32_FastCall;
+  else if (CC == CallingConv::X86_ThisCall)
+    return CC_X86_32_ThisCall;
   else if (CC == CallingConv::Fast)
     return CC_X86_32_FastCC;
   else if (CC == CallingConv::GHC)
@@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
+    if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+                    CallConv != CallingConv::X86_ThisCall)) {
       FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
                                                             true, false));
     }
@@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   if (!Is64Bit) {
     // RegSaveFrameIndex is X86-64 only.
     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
-    if (CallConv == CallingConv::X86_FastCall)
+    if (CallConv == CallingConv::X86_FastCall ||
+        CallConv == CallingConv::X86_ThisCall)
       // fastcc functions can't have varargs.
       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
   }
@@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
-  MFI->setHasCalls(true);
+  MFI->setAdjustsStack(true);
 
   SDValue Flag = Chain.getValue(1);
   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
 }
 
 SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
-                                     SDValue StackSlot,
+                                     SDValue StackSlot, 
                                      SelectionDAG &DAG) const {
   // Build the FILD
   DebugLoc dl = Op.getDebugLoc();
@@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   SDValue N0 = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
 
-  // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
   // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
   // the optimization here.
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
 
   EVT SrcVT = N0.getValueType();
-  if (SrcVT == MVT::i64) {
-    // We only handle SSE2 f64 target here; caller can expand the rest.
-    if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
-      return SDValue();
-
+  EVT DstVT = Op.getValueType();
+  if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
     return LowerUINT_TO_FP_i64(Op, DAG);
-  } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+  else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
     return LowerUINT_TO_FP_i32(Op, DAG);
-  }
-
-  assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
 
   // Make a 64-bit buffer, and use it to build an FILD.
   SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
-  SDValue WordOff = DAG.getConstant(4, getPointerTy());
-  SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
-                                   getPointerTy(), StackSlot, WordOff);
-  SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+  if (SrcVT == MVT::i32) {
+    SDValue WordOff = DAG.getConstant(4, getPointerTy());
+    SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+                                     getPointerTy(), StackSlot, WordOff);
+    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                                  StackSlot, NULL, 0, false, false, 0);
+    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+                                  OffsetSlot, NULL, 0, false, false, 0);
+    SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+    return Fild;
+  }
+
+  assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                 StackSlot, NULL, 0, false, false, 0);
-  SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
-                                OffsetSlot, NULL, 0, false, false, 0);
-  return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+  // For i64 source, we need to add the appropriate power of 2 if the input
+  // was negative.  This is the same as the optimization in
+  // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+  // we must be careful to do the computation in x87 extended precision, not
+  // in SSE. (The generic code can't know it's OK to do this, or how to.)
+  SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+  SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+  SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+
+  APInt FF(32, 0x5F800000ULL);
+
+  // Check whether the sign bit is set.
+  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+                                 Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+                                 ISD::SETLT);
+
+  // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+  SDValue FudgePtr = DAG.getConstantPool(
+                             ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                                         getPointerTy());
+
+  // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  SDValue Four = DAG.getIntPtrConstant(4);
+  SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+                               Zero, Four);
+  FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+  // Load the value out, extending it from f32 to f80.
+  // FIXME: Avoid the extend by constructing the right constant pool?
+  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+                                 FudgePtr, PseudoSourceValue::getConstantPool(),
+                                 0, MVT::f32, false, false, 4);
+  // Extend everything to 80 bits to force it to be done on x87.
+  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
 }
 
 std::pair<SDValue,SDValue> X86TargetLowering::
@@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getMergeValues(Ops1, 2, dl);
 }
 
-SDValue
-X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
-                                           SDValue Chain,
-                                           SDValue Dst, SDValue Src,
-                                           SDValue Size, unsigned Align,
-                                           bool isVolatile,
-                                           const Value *DstSV,
-                                           uint64_t DstSVOff) const {
-  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-
-  // If not DWORD aligned or size is more than the threshold, call the library.
-  // The libc version is likely to be faster for these cases. It can use the
-  // address value and run time information about the CPU.
-  if ((Align & 3) != 0 ||
-      !ConstantSize ||
-      ConstantSize->getZExtValue() >
-        getSubtarget()->getMaxInlineSizeThreshold()) {
-    SDValue InFlag(0, 0);
-
-    // Check to see if there is a specialized entry-point for memory zeroing.
-    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
-
-    if (const char *bzeroEntry =  V &&
-        V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
-      EVT IntPtr = getPointerTy();
-      const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext());
-      TargetLowering::ArgListTy Args;
-      TargetLowering::ArgListEntry Entry;
-      Entry.Node = Dst;
-      Entry.Ty = IntPtrTy;
-      Args.push_back(Entry);
-      Entry.Node = Size;
-      Args.push_back(Entry);
-      std::pair<SDValue,SDValue> CallResult =
-        LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
-                    false, false, false, false,
-                    0, CallingConv::C, false, /*isReturnValueUsed=*/false,
-                    DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
-      return CallResult.second;
-    }
-
-    // Otherwise have the target-independent code call memset.
-    return SDValue();
-  }
-
-  uint64_t SizeVal = ConstantSize->getZExtValue();
-  SDValue InFlag(0, 0);
-  EVT AVT;
-  SDValue Count;
-  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
-  unsigned BytesLeft = 0;
-  bool TwoRepStos = false;
-  if (ValC) {
-    unsigned ValReg;
-    uint64_t Val = ValC->getZExtValue() & 255;
-
-    // If the value is a constant, then we can potentially use larger sets.
-    switch (Align & 3) {
-    case 2:   // WORD aligned
-      AVT = MVT::i16;
-      ValReg = X86::AX;
-      Val = (Val << 8) | Val;
-      break;
-    case 0:  // DWORD aligned
-      AVT = MVT::i32;
-      ValReg = X86::EAX;
-      Val = (Val << 8)  | Val;
-      Val = (Val << 16) | Val;
-      if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
-        AVT = MVT::i64;
-        ValReg = X86::RAX;
-        Val = (Val << 32) | Val;
-      }
-      break;
-    default:  // Byte aligned
-      AVT = MVT::i8;
-      ValReg = X86::AL;
-      Count = DAG.getIntPtrConstant(SizeVal);
-      break;
-    }
-
-    if (AVT.bitsGT(MVT::i8)) {
-      unsigned UBytes = AVT.getSizeInBits() / 8;
-      Count = DAG.getIntPtrConstant(SizeVal / UBytes);
-      BytesLeft = SizeVal % UBytes;
-    }
-
-    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
-                              InFlag);
-    InFlag = Chain.getValue(1);
-  } else {
-    AVT = MVT::i8;
-    Count  = DAG.getIntPtrConstant(SizeVal);
-    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
-                                                              X86::ECX,
-                            Count, InFlag);
-  InFlag = Chain.getValue(1);
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
-                                                              X86::EDI,
-                            Dst, InFlag);
-  InFlag = Chain.getValue(1);
-
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
-  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
-
-  if (TwoRepStos) {
-    InFlag = Chain.getValue(1);
-    Count  = Size;
-    EVT CVT = Count.getValueType();
-    SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
-                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
-    Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
-                                                             X86::ECX,
-                              Left, InFlag);
-    InFlag = Chain.getValue(1);
-    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-    SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
-    Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
-  } else if (BytesLeft) {
-    // Handle the last 1 - 7 bytes.
-    unsigned Offset = SizeVal - BytesLeft;
-    EVT AddrVT = Dst.getValueType();
-    EVT SizeVT = Size.getValueType();
-
-    Chain = DAG.getMemset(Chain, dl,
-                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
-                                      DAG.getConstant(Offset, AddrVT)),
-                          Src,
-                          DAG.getConstant(BytesLeft, SizeVT),
-                          Align, isVolatile, DstSV, DstSVOff + Offset);
-  }
-
-  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
-  return Chain;
-}
-
-SDValue
-X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
-                                      SDValue Chain, SDValue Dst, SDValue Src,
-                                      SDValue Size, unsigned Align,
-                                      bool isVolatile, bool AlwaysInline,
-                                      const Value *DstSV,
-                                      uint64_t DstSVOff,
-                                      const Value *SrcSV,
-                                      uint64_t SrcSVOff) const {
-  // This requires the copy size to be a constant, preferrably
-  // within a subtarget-specific limit.
-  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-  if (!ConstantSize)
-    return SDValue();
-  uint64_t SizeVal = ConstantSize->getZExtValue();
-  if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
-    return SDValue();
-
-  /// If not DWORD aligned, call the library.
-  if ((Align & 3) != 0)
-    return SDValue();
-
-  // DWORD aligned
-  EVT AVT = MVT::i32;
-  if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
-    AVT = MVT::i64;
-
-  unsigned UBytes = AVT.getSizeInBits() / 8;
-  unsigned CountVal = SizeVal / UBytes;
-  SDValue Count = DAG.getIntPtrConstant(CountVal);
-  unsigned BytesLeft = SizeVal % UBytes;
-
-  SDValue InFlag(0, 0);
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
-                                                              X86::ECX,
-                            Count, InFlag);
-  InFlag = Chain.getValue(1);
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
-                                                              X86::EDI,
-                            Dst, InFlag);
-  InFlag = Chain.getValue(1);
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
-                                                              X86::ESI,
-                            Src, InFlag);
-  InFlag = Chain.getValue(1);
-
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
-  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
-                                array_lengthof(Ops));
-
-  SmallVector<SDValue, 4> Results;
-  Results.push_back(RepMovs);
-  if (BytesLeft) {
-    // Handle the last 1 - 7 bytes.
-    unsigned Offset = SizeVal - BytesLeft;
-    EVT DstVT = Dst.getValueType();
-    EVT SrcVT = Src.getValueType();
-    EVT SizeVT = Size.getValueType();
-    Results.push_back(DAG.getMemcpy(Chain, dl,
-                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
-                                                DAG.getConstant(Offset, DstVT)),
-                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
-                                                DAG.getConstant(Offset, SrcVT)),
-                                    DAG.getConstant(BytesLeft, SizeVT),
-                                    Align, isVolatile, AlwaysInline,
-                                    DstSV, DstSVOff + Offset,
-                                    SrcSV, SrcSVOff + Offset));
-  }
-
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                     &Results[0], Results.size());
-}
-
 SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
@@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
 
 SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
+
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
       break;
     }
     case CallingConv::X86_FastCall:
+    case CallingConv::X86_ThisCall:
     case CallingConv::Fast:
       // Pass 'nest' parameter in EAX.
       // Must be kept in sync with X86CallingConv.td
@@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
+SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT SrcVT = Op.getOperand(0).getValueType();
+  EVT DstVT = Op.getValueType();
+  assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && 
+          Subtarget->hasMMX() && !DisableMMX) &&
+         "Unexpected custom BIT_CONVERT");
+  assert((DstVT == MVT::i64 || 
+          (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+         "Unexpected custom BIT_CONVERT");
+  // i64 <=> MMX conversions are Legal.
+  if (SrcVT==MVT::i64 && DstVT.isVector())
+    return Op;
+  if (DstVT==MVT::i64 && SrcVT.isVector())
+    return Op;
+  // MMX <=> MMX conversions are Legal.
+  if (SrcVT.isVector() && DstVT.isVector())
+    return Op;
+  // All other conversions need to be expanded.
+  return SDValue();
+}
 SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
@@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SMULO:
   case ISD::UMULO:              return LowerXALUO(Op, DAG);
   case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, DAG);
+  case ISD::BIT_CONVERT:        return LowerBIT_CONVERT(Op, DAG);
   }
 }
 
@@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   MachineOperand& dest1Oper = bInstr->getOperand(0);
   MachineOperand& dest2Oper = bInstr->getOperand(1);
   MachineOperand* argOpers[2 + X86AddrNumOperands];
-  for (int i=0; i < 2 + X86AddrNumOperands; ++i)
+  for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
     argOpers[i] = &bInstr->getOperand(i+2);
 
+    // We use some of the operands multiple times, so conservatively just
+    // clear any kill flags that might be present.
+    if (argOpers[i]->isReg() && argOpers[i]->isUse())
+      argOpers[i]->setIsKill(false);
+  }
+
   // x86 address has 5 operands: base, index, scale, displacement, and segment.
   int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 440601f9..1ef1a7b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -563,7 +563,7 @@ namespace llvm {
       return !X86ScalarSSEf64 || VT == MVT::f80;
     }
     
-    virtual const X86Subtarget* getSubtarget() const {
+    const X86Subtarget* getSubtarget() const {
       return Subtarget;
     }
 
@@ -677,6 +677,7 @@ namespace llvm {
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG) const;
+    SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -743,23 +744,6 @@ namespace llvm {
     void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                  SelectionDAG &DAG, unsigned NewOp) const;
 
-    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
-                                    SDValue Chain,
-                                    SDValue Dst, SDValue Src,
-                                    SDValue Size, unsigned Align,
-                                    bool isVolatile,
-                                    const Value *DstSV,
-                                    uint64_t DstSVOff) const;
-    SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
-                                    SDValue Chain,
-                                    SDValue Dst, SDValue Src,
-                                    SDValue Size, unsigned Align,
-                                    bool isVolatile, bool AlwaysInline,
-                                    const Value *DstSV,
-                                    uint64_t DstSVOff,
-                                    const Value *SrcSV,
-                                    uint64_t SrcSVOff) const;
-    
     /// Utility function to emit string processing sse4.2 instructions
     /// that return in xmm0.
     /// This takes the instruction to expand, the associated machine basic
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index f5c3dbf..97eb17c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -18,7 +18,9 @@
 //
 
 // 64-bits but only 32 bits are significant.
-def i64i32imm  : Operand<i64>;
+def i64i32imm  : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
 
 // 64-bits but only 32 bits are significant, and those bits are treated as being
 // pc relative.
@@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> {
 
 // 64-bits but only 8 bits are significant.
 def i64i8imm   : Operand<i64> {
-  let ParserMatchClass = ImmSExt8AsmOperand;
+  let ParserMatchClass = ImmSExti64i8AsmOperand;
 }
 
 // Special i64mem for addresses of load folding tail calls. These are not
@@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
                                            variable_ops),
                        "#TC_RETURN $dst $offset", []>;
+  let mayLoad = 1 in
   def TCRETURNmi64 : I<0, Pseudo, (outs), 
                        (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
                        "#TC_RETURN $dst $offset", []>;
@@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
                      "jmp{q}\t{*}$dst  # TAILCALL", []>;
 
+  let mayLoad = 1 in
   def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
                      "jmp{q}\t{*}$dst  # TAILCALL", []>;
 }
@@ -241,6 +245,7 @@ def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
 
 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                     "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                     "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
 
@@ -267,14 +272,16 @@ def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
                      "push{q}\t$imm", []>;
 def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
                       "push{q}\t$imm", []>;
-def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
                       "push{q}\t$imm", []>;
 }
 
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
-def POPFQ    : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
-def PUSHFQ64   : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>;
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+               Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+                 Requires<[In64BitMode]>;
 
 def LEA64_32r : I<0x8D, MRMSrcMem,
                   (outs GR32:$dst), (ins lea64_32mem:$src),
@@ -309,16 +316,22 @@ def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
 } // Defs = [EFLAGS]
 
 // Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
 def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
                    [(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
 def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
                    [(X86rep_stos i64)]>, REP;
 
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>;
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
 
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
 
 // Fast system-call instructions
 def SYSEXIT64 : RI<0x35, RawFrm,
@@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                       [(set GR64:$dst, i64immSExt32:$src)]>;
 }
 
+// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
+// movabsq.
+let isAsmParserOnly = 1 in {
+def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+                    "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let isCodeGenOnly = 1 in {
 def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                      "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
@@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
 // Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src),
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src),
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
 // Sign/Zero extenders
@@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
 // In the case of a 32-bit def that is known to implicitly zero-extend,
 // we can use a SUBREG_TO_REG.
 def : Pat<(i64 (zext def32:$src)),
-          (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
 
 let neverHasSideEffects = 1 in {
   let Defs = [RAX], Uses = [EAX] in
@@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in {
 
 let Defs = [EFLAGS] in {
 
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src),
+def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
                      "add{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
@@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
 
 let Uses = [EFLAGS] in {
 
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src),
+def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
                      "adc{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
@@ -565,9 +587,11 @@ def ADC64rr  : RI<0x11, MRMDestReg, (outs GR64:$dst),
                   "adc{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
 
+let isCodeGenOnly = 1 in {
 def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), 
                      (ins GR64:$src1, GR64:$src2),
                     "adc{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 def ADC64rm  : RI<0x13, MRMSrcMem , (outs GR64:$dst), 
                   (ins GR64:$src1, i64mem:$src2),
@@ -605,9 +629,11 @@ def SUB64rr  : RI<0x29, MRMDestReg, (outs GR64:$dst),
                   [(set GR64:$dst, EFLAGS,
                         (X86sub_flag GR64:$src1, GR64:$src2))]>;
 
+let isCodeGenOnly = 1 in {
 def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), 
                      (ins GR64:$src1, GR64:$src2),
                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 // Register-Memory Subtraction
 def SUB64rm  : RI<0x2B, MRMSrcMem, (outs GR64:$dst), 
@@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
                             (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
 } // isTwoAddress
 
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
                      "sub{q}\t{$src, %rax|%rax, $src}", []>;
 
 // Memory-Register Subtraction
@@ -657,9 +683,11 @@ def SBB64rr    : RI<0x19, MRMDestReg, (outs GR64:$dst),
                     "sbb{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
 
+let isCodeGenOnly = 1 in {
 def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), 
                      (ins GR64:$src1, GR64:$src2),
                      "sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
                      
 def SBB64rm  : RI<0x1B, MRMSrcMem, (outs GR64:$dst), 
                   (ins GR64:$src1, i64mem:$src2),
@@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
                       [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
 } // isTwoAddress
 
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
                      "sbb{q}\t{$src, %rax|%rax, $src}", []>;
 
 def SBB64mr  : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
@@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
                 [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
 
 let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src),
+def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
                      "and{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
@@ -1086,9 +1114,11 @@ def AND64rr  : RI<0x21, MRMDestReg,
                   "and{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
                         (X86and_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
 def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), 
                      (ins GR64:$src1, GR64:$src2),
                      "and{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
 def AND64rm  : RI<0x23, MRMSrcMem,
                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
                   "and{q}\t{$src2, $dst|$dst, $src2}",
@@ -1129,9 +1159,11 @@ def OR64rr   : RI<0x09, MRMDestReg, (outs GR64:$dst),
                   "or{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
                         (X86or_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
 def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), 
                     (ins GR64:$src1, GR64:$src2),
                     "or{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
 def OR64rm   : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
                   (ins GR64:$src1, i64mem:$src2),
                   "or{q}\t{$src2, $dst|$dst, $src2}",
@@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
               [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
                (implicit EFLAGS)]>;
 
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src),
+def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
                     "or{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
@@ -1172,9 +1204,11 @@ def XOR64rr  : RI<0x31, MRMDestReg,  (outs GR64:$dst),
                   "xor{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
                         (X86xor_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
 def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), 
                      (ins GR64:$src1, GR64:$src2),
                     "xor{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
 def XOR64rm  : RI<0x33, MRMSrcMem, (outs GR64:$dst), 
                   (ins GR64:$src1, i64mem:$src2), 
                   "xor{q}\t{$src2, $dst|$dst, $src2}",
@@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
              [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
               (implicit EFLAGS)]>;
               
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
+def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
                      "xor{q}\t{$src, %rax|%rax, $src}", []>;
 
 } // Defs = [EFLAGS]
@@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
 
 // Integer comparison
 let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
+def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
                       "test{q}\t{$src, %rax|%rax, $src}", []>;
 let isCommutable = 1 in
 def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
@@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
                                            i64immSExt32:$src2), 0))]>;
 
 
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
                      "cmp{q}\t{$src, %rax|%rax, $src}", []>;
 def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
@@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
 
 def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
+		REX_W;
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
@@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
 
 def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                    "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
 def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                    "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
                    
 def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
 def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
                       
@@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
                   "xchg{q}\t{$src, %rax|%rax, $src}", []>;
 
 // Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
 // FIXME: Use normal add / sub instructions and add lock prefix dynamically.
 def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
                       "lock\n\t"
@@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
 // defined after an extload.
 def : Pat<(extloadi64i32 addr:$src),
           (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
-                         x86_subreg_32bit)>;
+                         sub_32bit)>;
 
 // anyext. Define these to do an explicit zero-extend to
 // avoid partial-register updates.
 def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
 def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
 def : Pat<(i64 (anyext GR32:$src)),
-          (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
 
 //===----------------------------------------------------------------------===//
 // Some peepholes
@@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
           (SUBREG_TO_REG
             (i64 0),
             (AND32ri
-              (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
               (i32 (GetLo32XForm imm:$imm))),
-            x86_subreg_32bit)>;
+            sub_32bit)>;
 
 // r & (2^32-1) ==> movz
 def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
-          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
 // r & (2^16-1) ==> movz
 def : Pat<(and GR64:$src, 0xffff),
-          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR64:$src, 0xff),
-          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR32:$src1, 0xff),
-           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
+           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
       Requires<[In64BitMode]>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR16:$src1, 0xff),
-           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
+           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
       Requires<[In64BitMode]>;
 
 // sext_inreg patterns
 def : Pat<(sext_inreg GR64:$src, i32),
-          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
 def : Pat<(sext_inreg GR64:$src, i16),
-          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
 def : Pat<(sext_inreg GR64:$src, i8),
-          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
 def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
       Requires<[In64BitMode]>;
 def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
+          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
       Requires<[In64BitMode]>;
 
 // trunc patterns
 def : Pat<(i32 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
+          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
 def : Pat<(i16 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
+          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
 def : Pat<(i8 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
+          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
 def : Pat<(i8 (trunc GR32:$src)),
-          (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
+          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
       Requires<[In64BitMode]>;
 def : Pat<(i8 (trunc GR16:$src)),
-          (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
       Requires<[In64BitMode]>;
 
 // h-register tricks.
@@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
             (i64 0),
             (MOVZX32_NOREXrr8
               (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                              x86_subreg_8bit_hi)),
-            x86_subreg_32bit)>;
+                              sub_8bit_hi)),
+            sub_32bit)>;
 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
           (MOVZX32_NOREXrr8
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                            x86_subreg_8bit_hi))>,
+                            sub_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
           (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
                                                                    GR32_ABCD)),
-                                             x86_subreg_8bit_hi))>,
+                                             sub_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(srl GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32_NOREXrr8
               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              x86_subreg_8bit_hi)),
-            x86_subreg_16bit)>,
+                              sub_8bit_hi)),
+            sub_16bit)>,
       Requires<[In64BitMode]>;
 def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32_NOREXrr8
             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            x86_subreg_8bit_hi))>,
+                            sub_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32_NOREXrr8
             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            x86_subreg_8bit_hi))>,
+                            sub_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
           (SUBREG_TO_REG
             (i64 0),
             (MOVZX32_NOREXrr8
               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              x86_subreg_8bit_hi)),
-            x86_subreg_32bit)>;
+                              sub_8bit_hi)),
+            sub_32bit)>;
 def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
           (SUBREG_TO_REG
             (i64 0),
             (MOVZX32_NOREXrr8
               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              x86_subreg_8bit_hi)),
-            x86_subreg_32bit)>;
+                              sub_8bit_hi)),
+            sub_32bit)>;
 
 // h-register extract and store.
 def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                            x86_subreg_8bit_hi))>;
+                            sub_8bit_hi))>;
 def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                            x86_subreg_8bit_hi))>,
+                            sub_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            x86_subreg_8bit_hi))>,
+                            sub_8bit_hi))>,
       Requires<[In64BitMode]>;
 
 // (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a21bfb9..34e12ca 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
     case X86::MOVZX32rr8:
     case X86::MOVSX64rr8:
     case X86::MOVZX64rr8:
-      SubIdx = 1;
+      SubIdx = X86::sub_8bit;
       break;
     case X86::MOVSX32rr16:
     case X86::MOVZX32rr16:
     case X86::MOVSX64rr16:
     case X86::MOVZX64rr16:
-      SubIdx = 3;
+      SubIdx = X86::sub_16bit;
       break;
     case X86::MOVSX64rr32:
     case X86::MOVZX64rr32:
-      SubIdx = 4;
+      SubIdx = X86::sub_32bit;
       break;
     }
     return true;
@@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                  unsigned DestReg, unsigned SubIdx,
                                  const MachineInstr *Orig,
                                  const TargetRegisterInfo *TRI) const {
-  DebugLoc DL = MBB.findDebugLoc(I);
+  DebugLoc DL = Orig->getDebugLoc();
 
   if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
     DestReg = TRI->getSubReg(DestReg, SubIdx);
@@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
     BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
     .addReg(leaInReg)
     .addReg(Src, getKillRegState(isKill))
-    .addImm(X86::SUBREG_16BIT);
+    .addImm(X86::sub_16bit);
 
   MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
                                     get(Opc), leaOutReg);
@@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
         BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
         .addReg(leaInReg2)
         .addReg(Src2, getKillRegState(isKill2))
-        .addImm(X86::SUBREG_16BIT);
+        .addImm(X86::sub_16bit);
       addRegReg(MIB, leaInReg, true, leaInReg2, true);
     }
     if (LV && isKill2 && InsMI2)
@@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
     BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
     .addReg(Dest, RegState::Define | getDeadRegState(isDead))
     .addReg(leaOutReg, RegState::Kill)
-    .addImm(X86::SUBREG_16BIT);
+    .addImm(X86::sub_16bit);
 
   if (LV) {
     // Update live variables
@@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI,
                                 unsigned DestReg, unsigned SrcReg,
                                 const TargetRegisterClass *DestRC,
-                                const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = MBB.findDebugLoc(MI);
+                                const TargetRegisterClass *SrcRC,
+                                DebugLoc DL) const {
 
   // Determine if DstRC and SrcRC have a common superclass in common.
   const TargetRegisterClass *CommonRC = DestRC;
@@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     if (SrcReg != X86::EFLAGS)
       return false;
     if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
+      BuildMI(MBB, MI, DL, get(X86::PUSHF64));
       BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
       return true;
     } else if (DestRC == &X86::GR32RegClass ||
                DestRC == &X86::GR32_NOSPRegClass) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+      BuildMI(MBB, MI, DL, get(X86::PUSHF32));
       BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
       return true;
     }
@@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
       return false;
     if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
       BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
-      BuildMI(MBB, MI, DL, get(X86::POPFQ));
+      BuildMI(MBB, MI, DL, get(X86::POPF64));
       return true;
     } else if (SrcRC == &X86::GR32RegClass ||
                DestRC == &X86::GR32_NOSPRegClass) {
       BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
-      BuildMI(MBB, MI, DL, get(X86::POPFD));
+      BuildMI(MBB, MI, DL, get(X86::POPF32));
       return true;
     }
   }
@@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
 void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MI,
                                        unsigned SrcReg, bool isKill, int FrameIdx,
-                                       const TargetRegisterClass *RC) const {
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
   bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
 void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
                                         unsigned DestReg, int FrameIdx,
-                                        const TargetRegisterClass *RC) const{
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
   bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
 
 bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
@@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       CalleeFrameSize += SlotSize;
       BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
     } else {
-      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass,
+                          &RI);
     }
   }
 
@@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
 bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty())
     return false;
 
@@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
     if (RegClass != &X86::VR128RegClass && !isWin64) {
       BuildMI(MBB, MI, DL, get(Opc), Reg);
     } else {
-      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
     }
   }
   return true;
@@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
         unsigned DstReg = NewMI->getOperand(0).getReg();
         if (TargetRegisterInfo::isPhysicalRegister(DstReg))
           NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
-                                                   4/*x86_subreg_32bit*/));
+                                                   X86::sub_32bit));
         else
-          NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+          NewMI->getOperand(0).setSubReg(X86::sub_32bit);
       }
       return NewMI;
     }
@@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     switch (MI->getOpcode()) {
     default: return NULL;
     case X86::TEST8rr:  NewOpc = X86::CMP8ri; RCSize = 1; break;
-    case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
-    case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
-    case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
     }
     // Check if it's safe to fold the load. If the size of the object is
     // narrower than the load width, then it's not.
@@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     switch (MI->getOpcode()) {
     default: return NULL;
     case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
-    case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
-    case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
-    case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
     }
     // Change to CMPXXri r, 0 first.
     MI->setDesc(get(NewOpc));
@@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
   switch (DataMI->getOpcode()) {
   default: break;
   case X86::CMP64ri32:
+  case X86::CMP64ri8:
   case X86::CMP32ri:
+  case X86::CMP32ri8:
   case X86::CMP16ri:
+  case X86::CMP16ri8:
   case X86::CMP8ri: {
     MachineOperand &MO0 = DataMI->getOperand(0);
     MachineOperand &MO1 = DataMI->getOperand(1);
     if (MO1.getImm() == 0) {
       switch (DataMI->getOpcode()) {
       default: break;
+      case X86::CMP64ri8:
       case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+      case X86::CMP32ri8:
       case X86::CMP32ri:   NewOpc = X86::TEST32rr; break;
+      case X86::CMP16ri8:
       case X86::CMP16ri:   NewOpc = X86::TEST16rr; break;
       case X86::CMP8ri:    NewOpc = X86::TEST8rr; break;
       }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index df99c7f..62d7c74 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -590,11 +590,13 @@ public:
                             MachineBasicBlock::iterator MI,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
                               SmallVectorImpl<MachineOperand> &Addr,
@@ -606,7 +608,8 @@ public:
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                SmallVectorImpl<MachineOperand> &Addr,
@@ -617,11 +620,13 @@ public:
   
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const;
 
   virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                           const TargetRegisterInfo *TRI) const;
   
   virtual
   MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a2754ea..0d59c42 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
 //
 def X86MemAsmOperand : AsmOperandClass {
   let Name = "Mem";
-  let SuperClass = ?;
-}
-def X86AbsMemAsmOperand : AsmOperandClass {
-  let Name = "AbsMem";
-  let SuperClass = X86MemAsmOperand;
+  let SuperClasses = [];
 }
 def X86NoSegMemAsmOperand : AsmOperandClass {
   let Name = "NoSegMem";
-  let SuperClass = X86MemAsmOperand;
+  let SuperClasses = [X86MemAsmOperand];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+  let Name = "AbsMem";
+  let SuperClasses = [X86NoSegMemAsmOperand];
 }
 class X86MemOperand<string printMethod> : Operand<iPTR> {
   let PrintMethod = printMethod;
@@ -270,19 +270,49 @@ def SSECC : Operand<i8> {
   let PrintMethod = "printSSECC";
 }
 
-def ImmSExt8AsmOperand : AsmOperandClass {
-  let Name = "ImmSExt8";
-  let SuperClass = ImmAsmOperand;
+class ImmSExtAsmOperandClass : AsmOperandClass {
+  let SuperClasses = [ImmAsmOperand];
+  let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF]                                            | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti16i8";
+  let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F]                                            | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti64i8";
+  let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand];
 }
 
 // A couple of more descriptive operand definitions.
 // 16-bits but only 8 bits are significant.
 def i16i8imm  : Operand<i16> {
-  let ParserMatchClass = ImmSExt8AsmOperand;
+  let ParserMatchClass = ImmSExti16i8AsmOperand;
 }
 // 32-bits but only 8 bits are significant.
 def i32i8imm  : Operand<i32> {
-  let ParserMatchClass = ImmSExt8AsmOperand;
+  let ParserMatchClass = ImmSExti32i8AsmOperand;
 }
 
 //===----------------------------------------------------------------------===//
@@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in {
 }
 
 // Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
-def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+// FIXME: need to make sure that "int $3" matches int3
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
 def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
 def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
 
@@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TCRETURNri : I<0, Pseudo, (outs), 
                      (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
                      "#TC_RETURN $dst $offset", []>;
+  let mayLoad = 1 in
   def TCRETURNmi : I<0, Pseudo, (outs), 
                      (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
                      "#TC_RETURN $dst $offset", []>;
@@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), 
                    "jmp{l}\t{*}$dst  # TAILCALL",
                  []>;     
+  let mayLoad = 1 in
   def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
                    "jmp{l}\t{*}$dst  # TAILCALL", []>;
+
+  // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL
+  // marker on instructions, while still being able to relax.
+  let isCodeGenOnly = 1 in {
+    def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                         "jmp\t$dst  # TAILCALL", []>;
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -719,10 +760,12 @@ def LEAVE    : I<0xC9, RawFrm,
 
 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                    "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+let mayLoad = 1 in
 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                    "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                    "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                    "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
 
@@ -762,12 +805,14 @@ def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
 }
 
 let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF     : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPFD    : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>;
+def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+               Requires<[In32BitMode]>;
 }
 let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
-def PUSHF    : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
-def PUSHFD   : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>;
+def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+               Requires<[In32BitMode]>;
 }
 
 let isTwoAddress = 1 in                               // GR32 = bswap GR32
@@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
 let Defs = [RAX, RCX, RDX] in
 def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
 
-let isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
 def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
 }
 
@@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(store (i32 imm:$src), addr:$dst)]>;
 
-def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src),
+/// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
+/// 32-bit offset from the PC.  These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
                    "mov{b}\t{$src, %al|%al, $src}", []>;
-def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
                       "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
 def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
                       "mov{l}\t{$src, %eax|%eax, $src}", []>;
-
-def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
                    "mov{b}\t{%al, $dst|$dst, %al}", []>;
-def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
                       "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
 def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, %eax}", []>;
-
+                      
 // Moves to and from segment registers
 def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 
+let isCodeGenOnly = 1 in {
 def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
                    "mov{b}\t{$src, $dst|$dst, $src}", []>;
 def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                     "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                     "mov{l}\t{$src, $dst|$dst, $src}", []>;
+}
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
 def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
@@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
                 
 // Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
 
 //===----------------------------------------------------------------------===//
 //  Fixed-Register Multiplication and Division Instructions...
@@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg,
 
 // AND instructions with the destination register in REG and the source register
 //   in R/M.  Included for the disassembler.
+let isCodeGenOnly = 1 in {
 def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
                   "and{b}\t{$src2, $dst|$dst, $src2}", []>;
 def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), 
@@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
 def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), 
                     (ins GR32:$src1, GR32:$src2),
                    "and{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 def AND8rm   : I<0x22, MRMSrcMem, 
                  (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
@@ -1872,6 +1930,7 @@ def OR32rr   : I<0x09, MRMDestReg, (outs GR32:$dst),
 
 // OR instructions with the destination register in REG and the source register
 //   in R/M.  Included for the disassembler.
+let isCodeGenOnly = 1 in {
 def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
                   "or{b}\t{$src2, $dst|$dst, $src2}", []>;
 def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
@@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
 def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), 
                    (ins GR32:$src1, GR32:$src2),
                    "or{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
                   
 def OR8rm    : I<0x0A, MRMSrcMem, (outs GR8 :$dst), 
                  (ins GR8 :$src1, i8mem :$src2),
@@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
 
 // XOR instructions with the destination register in REG and the source register
 //   in R/M.  Included for the disassembler.
+let isCodeGenOnly = 1 in {
 def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
                   "xor{b}\t{$src2, $dst|$dst, $src2}", []>;
 def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), 
@@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
 def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), 
                     (ins GR32:$src1, GR32:$src2),
                    "xor{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 def XOR8rm   : I<0x32, MRMSrcMem, 
                  (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), 
@@ -2793,6 +2855,7 @@ def ADC32rr  : I<0x11, MRMDestReg, (outs GR32:$dst),
                  [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
 }
 
+let isCodeGenOnly = 1 in {
 def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
                  "adc{b}\t{$src2, $dst|$dst, $src2}", []>;
 def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), 
@@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
 def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), 
                     (ins GR32:$src1, GR32:$src2),
                     "adc{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 def ADC8rm   : I<0x12, MRMSrcMem , (outs GR8:$dst), 
                                    (ins GR8:$src1, i8mem:$src2),
@@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
                 [(set GR32:$dst, EFLAGS,
                       (X86sub_flag GR32:$src1, GR32:$src2))]>;
 
+let isCodeGenOnly = 1 in {
 def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>;
 def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), 
@@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
 def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), 
                     (ins GR32:$src1, GR32:$src2),
                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 // Register-Memory Subtraction
 def SUB8rm  : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
@@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in {
                       "sbb{l}\t{$src, %eax|%eax, $src}", []>;
 }
 
+let isCodeGenOnly = 1 in {
 def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
                    "sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
 def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), 
@@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
 def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), 
                     (ins GR32:$src1, GR32:$src2),
                     "sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
 
 def SBB8rm   : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
                     "sbb{b}\t{$src2, $dst|$dst, $src2}",
@@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
 def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
 
+let mayLoad = 1, mayStore = 1 in {
 def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                  "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
 def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                  "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def XADD32rm  : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
 
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                    "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
 def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
 
+let mayLoad = 1, mayStore = 1 in {
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                      "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
 def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                      "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
 def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
@@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
 
 // Optimized codegen when the non-memory output is not used.
 // FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
 def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
                     "lock\n\t"
                     "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
@@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
 
 // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
 def : Pat<(i32 (anyext GR16:$src)),
-          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
 
 
 //===----------------------------------------------------------------------===//
@@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
 
 // r & (2^16-1) ==> movz
 def : Pat<(and GR32:$src1, 0xffff),
-          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
+          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR32:$src1, 0xff),
           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, 
                                                              GR32_ABCD)),
-                                      x86_subreg_8bit))>,
+                                      sub_8bit))>,
       Requires<[In32BitMode]>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR16:$src1, 0xff),
           (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, 
                                                              GR16_ABCD)),
-                                      x86_subreg_8bit))>,
+                                      sub_8bit))>,
       Requires<[In32BitMode]>;
 
 // sext_inreg patterns
 def : Pat<(sext_inreg GR32:$src, i16),
-          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
 def : Pat<(sext_inreg GR32:$src, i8),
           (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
                                                              GR32_ABCD)),
-                                      x86_subreg_8bit))>,
+                                      sub_8bit))>,
       Requires<[In32BitMode]>;
 def : Pat<(sext_inreg GR16:$src, i8),
           (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
                                                              GR16_ABCD)),
-                                      x86_subreg_8bit))>,
+                                      sub_8bit))>,
       Requires<[In32BitMode]>;
 
 // trunc patterns
 def : Pat<(i16 (trunc GR32:$src)),
-          (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
+          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
 def : Pat<(i8 (trunc GR32:$src)),
           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                          x86_subreg_8bit)>,
+                          sub_8bit)>,
       Requires<[In32BitMode]>;
 def : Pat<(i8 (trunc GR16:$src)),
           (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                          x86_subreg_8bit)>,
+                          sub_8bit)>,
       Requires<[In32BitMode]>;
 
 // h-register tricks
 def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
           (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                          x86_subreg_8bit_hi)>,
+                          sub_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                          x86_subreg_8bit_hi)>,
+                          sub_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(srl GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32rr8
               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              x86_subreg_8bit_hi)),
-            x86_subreg_16bit)>,
+                              sub_8bit_hi)),
+            sub_16bit)>,
       Requires<[In32BitMode]>;
 def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
                                                              GR16_ABCD)),
-                                      x86_subreg_8bit_hi))>,
+                                      sub_8bit_hi))>,
       Requires<[In32BitMode]>;
 def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
                                                              GR16_ABCD)),
-                                      x86_subreg_8bit_hi))>,
+                                      sub_8bit_hi))>,
       Requires<[In32BitMode]>;
 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
                                                              GR32_ABCD)),
-                                      x86_subreg_8bit_hi))>,
+                                      sub_8bit_hi))>,
       Requires<[In32BitMode]>;
 def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
                                                              GR32_ABCD)),
-                                      x86_subreg_8bit_hi))>,
+                                      sub_8bit_hi))>,
       Requires<[In32BitMode]>;
 
 // (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 744af50..0952fc8 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
 def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), 
-                         (ins i64mem:$dst, VR64:$src),
-                         "movq\t{$src, $dst|$dst, $src}", []>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
                                "movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
-                            "movd\t{$src, $dst|$dst, $src}",
-                            [(set VR64:$dst,
-                             (v1i64 (scalar_to_vector GR64:$src)))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst,
+                              (v1i64 (scalar_to_vector GR64:$src)))]>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2129580..5580ba7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
-def alignedloadfsf32 : PatFrag<(ops node:$ptr), 
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
                                (f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr), 
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
                                (f64 (alignedload node:$ptr))>;
-def alignedloadv4f32 : PatFrag<(ops node:$ptr), 
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
                                (v4f32 (alignedload node:$ptr))>;
-def alignedloadv2f64 : PatFrag<(ops node:$ptr), 
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
                                (v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr), 
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
                                (v4i32 (alignedload node:$ptr))>;
-def alignedloadv2i64 : PatFrag<(ops node:$ptr), 
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
                                (v2i64 (alignedload node:$ptr))>;
 
 // Like 'load', but uses special alignment checks suitable for use in
@@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg,
 let AddedComplexity = 15 in
 def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
           (MOVSSrr (v4f32 VR128:$src1),
-                   (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
 
 // Implicitly promote a 32-bit scalar to a vector.
 def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
-          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
 
 // Loading from memory automatically zeroing upper bits.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
 // with SUBREG_TO_REG.
 let AddedComplexity = 20 in {
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
-          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
 }
 
 // Store scalar value to memory.
@@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
                  addr:$dst),
           (MOVSSmr addr:$dst,
-                   (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+                   (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
 
 // Conversion instructions
 def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
@@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
                          [(set GR32:$dst, (int_x86_sse_cvtss2si
                                            (load addr:$src)))]>;
 
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
 def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                          "cvtps2pi\t{$src, $dst|$dst, $src}",
                          [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
@@ -509,6 +509,17 @@ let mayLoad = 1 in
   def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
                     (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
                     "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+
+  // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+  def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
+                    (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+                    "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+  def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
+                    (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+                    "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
 }
 
 let Defs = [EFLAGS] in {
@@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
                    "ucomiss\t{$src2, $src1|$src1, $src2}",
                    [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-                    
+
 def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                   "comiss\t{$src2, $src1|$src1, $src2}", []>;
 def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                   "comiss\t{$src2, $src1|$src1, $src2}", []>;
-                  
+
 } // Defs = [EFLAGS]
 
 // Aliases to match intrinsics which expect XMM operand(s).
 let Constraints = "$src1 = $dst" in {
   def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
-                        (outs VR128:$dst), 
+                        (outs VR128:$dst),
                         (ins VR128:$src1, VR128:$src, SSECC:$cc),
                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse_cmp_ss 
+                        [(set VR128:$dst, (int_x86_sse_cmp_ss
                                              VR128:$src1,
                                              VR128:$src, imm:$cc))]>;
   def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
-                        (outs VR128:$dst), 
+                        (outs VR128:$dst),
                         (ins VR128:$src1, f32mem:$src, SSECC:$cc),
                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
@@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in {
                   "cmp${cc}ps\t{$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                             (memop addr:$src), imm:$cc))]>;
+
+  // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+  def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+                    "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+  def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
+                  (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+                  "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+}
 }
 def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
           (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
@@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 }
 
 // Load, store, and memory fence
-def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+             TB, Requires<[HasSSE1]>;
 
 // MXCSR register
 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
@@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
 def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
 
 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 Instructions
@@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg,
 let AddedComplexity = 15 in
 def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
           (MOVSDrr (v2f64 VR128:$src1),
-                   (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
 
 // Implicitly promote a 64-bit scalar to a vector.
 def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
-          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
 
 // Loading from memory automatically zeroing upper bits.
 let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
@@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
 // with SUBREG_TO_REG.
 let AddedComplexity = 20 in {
 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
 def : Pat<(v2f64 (X86vzload addr:$src)),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
 }
 
 // Store scalar value to memory.
@@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
                  addr:$dst),
           (MOVSDmr addr:$dst,
-                   (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+                   (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
 
 // Conversion instructions
 def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
@@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
                          [(set GR32:$dst, (int_x86_sse2_cvtsd2si
                                            (load addr:$src)))]>;
 
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
 def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                          "cvtpd2pi\t{$src, $dst|$dst, $src}",
                          [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
@@ -1297,6 +1319,17 @@ let mayLoad = 1 in
   def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
                     (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
                     "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+
+  // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+  def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
+                    (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+                    "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+  def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
+                    (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+                    "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
 }
 
 let Defs = [EFLAGS] in {
@@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
 // Aliases to match intrinsics which expect XMM operand(s).
 let Constraints = "$src1 = $dst" in {
   def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
-                        (outs VR128:$dst), 
+                        (outs VR128:$dst),
                         (ins VR128:$src1, VR128:$src, SSECC:$cc),
                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
                                            VR128:$src, imm:$cc))]>;
   def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
-                        (outs VR128:$dst), 
+                        (outs VR128:$dst),
                         (ins VR128:$src1, f64mem:$src, SSECC:$cc),
                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
@@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
 
 def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, 
+                        [(set VR128:$dst,
                               (int_x86_sse2_cvttps2dq VR128:$src))]>,
                       XS, Requires<[HasSSE2]>;
 def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in {
                   "cmp${cc}pd\t{$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                                  (memop addr:$src), imm:$cc))]>;
+
+  // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+  def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+                    "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+  def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
+                  (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+                  "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
 }
 def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
           (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
@@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in {
 
 multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                             bit Commutable = 0> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
                                (ins VR128:$src1, VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), 
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
                                (ins VR128:$src1, i128mem:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1,
-                                        (bitconvert (memopv2i64 
+                                        (bitconvert (memopv2i64
                                                      addr:$src2))))]>;
 }
 
 multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                              string OpcodeStr,
                              Intrinsic IntId, Intrinsic IntId2> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
                                (ins VR128:$src1, VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
@@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1,
                                       (bitconvert (memopv2i64 addr:$src2))))]>;
-  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), 
+  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
                                 (ins VR128:$src1, i32i8imm:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
@@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
 /// PDI_binop_rm - Simple SSE2 binary operator.
 multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                         ValueType OpVT, bit Commutable = 0> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
                                (ins VR128:$src1, VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), 
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
                                (ins VR128:$src1, i128mem:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
@@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
                "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
 
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
 //TODO: custom lower this so as to never even generate the noop
 def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
            (i8 0)), (NOOP)>;
@@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                                     (iPTR 0))), addr:$dst)]>;
 
 def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
-          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
 
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
@@ -2903,7 +2950,7 @@ defm PMADDUBSW   : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
 defm PMULHRSW    : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
                                         int_x86_ssse3_pmul_hr_sw,
                                         int_x86_ssse3_pmul_hr_sw_128, 1>;
-                                        
+
 defm PSHUFB      : SS3I_binop_rm_int_8 <0x00, "pshufb",
                                         int_x86_ssse3_pshuf_b,
                                         int_x86_ssse3_pshuf_b_128>;
@@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
           (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
           (MOVSSrr (v4f32 (V_SET0PS)),
-                   (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
+                   (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
           (MOVSSrr (v4i32 (V_SET0PI)),
-                   (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
+                   (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
 }
 
 // Splat v2f64 / v2i64
@@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in {
 // Setting the lowest element in the vector.
 def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
           (MOVSSrr (v4i32 VR128:$src1),
-                   (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
 def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
           (MOVSDrr (v2i64 VR128:$src1),
-                   (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
 
 // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
 def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
-          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
       Requires<[HasSSE2]>;
 def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
-          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
       Requires<[HasSSE2]>;
 }
 
@@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in {
 let Constraints = "$src1 = $dst" in {
 multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                         ValueType OpVT, bit Commutable = 0> {
-  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
                                  (ins VR128:$src1, VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
                OpSize {
     let isCommutable = Commutable;
   }
-  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
                                  (ins VR128:$src1, i128mem:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpNode VR128:$src1,
@@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
 def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
   (ins VR128:$src1, VR128:$src3, i8imm:$src5),
   "#PCMPESTRM128rr PSEUDO!",
-  [(set VR128:$dst, 
-        (int_x86_sse42_pcmpestrm128 
+  [(set VR128:$dst,
+        (int_x86_sse42_pcmpestrm128
          VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
 
 def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
   (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
   "#PCMPESTRM128rm PSEUDO!",
-  [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 
-                     VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, 
+  [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+                     VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
   OpSize;
 }
 
@@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
 
 let Defs = [ECX, EFLAGS] in {
   multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
-    def rr : SS42AI<0x63, MRMSrcReg, (outs), 
+    def rr : SS42AI<0x63, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
       [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
@@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in {
     def rm : SS42AI<0x61, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
        "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
-       [(set ECX, 
+       [(set ECX,
              (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
         (implicit EFLAGS)]>, OpSize;
   }
@@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
   OpSize;
 
 def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
-  (ins VR128:$src1, i32i8imm:$src2),
+  (ins VR128:$src1, i8imm:$src2),
   "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   [(set VR128:$dst,
     (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
   OpSize;
 def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
-  (ins i128mem:$src1, i32i8imm:$src2),
+  (ins i128mem:$src1, i8imm:$src2),
   "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   [(set VR128:$dst,
     (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
                                     imm:$src2))]>,
   OpSize;
-
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a3e04b0..98975ea 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -37,7 +37,6 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
   case X86::XMM7: case X86::XMM15: case X86::MM7:
     return 7;
 
+  case X86::ES:
+    return 0;
+  case X86::CS:
+    return 1;
+  case X86::SS:
+    return 2;
+  case X86::DS:
+    return 3;
+  case X86::FS:
+    return 4;
+  case X86::GS:
+    return 5;
+
   default:
     assert(isVirtualRegister(RegNo) && "Unknown physical register!");
     llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
                                           unsigned SubIdx) const {
   switch (SubIdx) {
   default: return 0;
-  case 1:
-    // 8-bit
+  case X86::sub_8bit:
     if (B == &X86::GR8RegClass) {
       if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
         return A;
@@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
         return &X86::GR16_NOREXRegClass;
       else if (A == &X86::GR16_ABCDRegClass)
         return &X86::GR16_ABCDRegClass;
-    } else if (B == &X86::FR32RegClass) {
-      return A;
     }
     break;
-  case 2:
-    // 8-bit hi
+  case X86::sub_8bit_hi:
     if (B == &X86::GR8_ABCD_HRegClass) {
       if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
           A == &X86::GR64_NOREXRegClass ||
@@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
       else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
                A == &X86::GR16_NOREXRegClass)
         return &X86::GR16_ABCDRegClass;
-    } else if (B == &X86::FR64RegClass) {
-      return A;
     }
     break;
-  case 3:
-    // 16-bit
+  case X86::sub_16bit:
     if (B == &X86::GR16RegClass) {
       if (A->getSize() == 4 || A->getSize() == 8)
         return A;
@@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
         return &X86::GR32_NOREXRegClass;
       else if (A == &X86::GR32_ABCDRegClass)
         return &X86::GR64_ABCDRegClass;
-    } else if (B == &X86::VR128RegClass) {
-      return A;
     }
     break;
-  case 4:
-    // 32-bit
+  case X86::sub_32bit:
     if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
       if (A->getSize() == 8)
         return A;
@@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
         return &X86::GR64_ABCDRegClass;
     }
     break;
+  case X86::sub_ss:
+    if (B == &X86::FR32RegClass)
+      return A;
+    break;
+  case X86::sub_sd:
+    if (B == &X86::FR64RegClass)
+      return A;
+    break;
+  case X86::sub_xmm:
+    if (B == &X86::VR128RegClass)
+      return A;
+    break;
   }
   return 0;
 }
@@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
   return Offset;
 }
 
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::SUB64ri8;
+    return X86::SUB64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::SUB32ri8;
+    return X86::SUB32ri;
+  }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::ADD64ri8;
+    return X86::ADD64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::ADD32ri8;
+    return X86::ADD32ri;
+  }
+}
+
 void X86RegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
@@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       MachineInstr *New = 0;
       if (Old->getOpcode() == getCallFrameSetupOpcode()) {
         New = BuildMI(MF, Old->getDebugLoc(),
-                      TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
+                      TII.get(getSUBriOpcode(Is64Bit, Amount)),
                       StackPtr)
           .addReg(StackPtr)
           .addImm(Amount);
@@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
         Amount -= CalleeAmt;
   
       if (Amount) {
-          unsigned Opc = (Amount < 128) ?
-            (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
-            (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+          unsigned Opc = getADDriOpcode(Is64Bit, Amount);
           New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
             .addReg(StackPtr)
             .addImm(Amount);
@@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // something off the stack pointer, add it back.  We do this until we have
     // more advanced stack pointer tracking ability.
     if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
-      unsigned Opc = (CalleeAmt < 128) ?
-        (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
-        (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+      unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
       MachineInstr *Old = I;
       MachineInstr *New =
         BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), 
@@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
                   const TargetInstrInfo &TII) {
   bool isSub = NumBytes < 0;
   uint64_t Offset = isSub ? -NumBytes : NumBytes;
-  unsigned Opc = isSub
-    ? ((Offset < 128) ?
-       (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
-       (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
-    : ((Offset < 128) ?
-       (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
-       (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+  unsigned Opc = isSub ?
+    getSUBriOpcode(Is64Bit, Offset) :
+    getADDriOpcode(Is64Bit, Offset);
   uint64_t Chunk = (1LL << 31) - 1;
   DebugLoc DL = MBB.findDebugLoc(MBBI);
 
@@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
       !needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->hasCalls() &&                          // No calls.
+      !MFI->adjustsStack() &&                      // No calls.
       !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
@@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   // size is bigger than the callers.
   if (TailCallReturnAddrDelta < 0) {
     MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+      BuildMI(MBB, MBBI, DL,
+              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
               StackPtr)
         .addReg(StackPtr)
         .addImm(-TailCallReturnAddrDelta);
@@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
   // Calculate amount of bytes used for return address storing
   int stackGrowth = (Is64Bit ? -8 : -4);
 
-  // Initial state of the frame pointer is esp+4.
+  // Initial state of the frame pointer is esp+stackGrowth.
   MachineLocation Dst(MachineLocation::VirtualFP);
   MachineLocation Src(StackPtr, stackGrowth);
   Moves.push_back(MachineMove(0, Dst, Src));
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index ac96c4c..d0b82e2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -30,16 +30,6 @@ namespace N86 {
   };
 }
 
-namespace X86 {
-  /// SubregIndex - The index of various sized subregister classes. Note that 
-  /// these indices must be kept in sync with the class indices in the 
-  /// X86RegisterInfo.td file.
-  enum SubregIndex {
-    SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
-    SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
-  };
-}
-
 /// DWARFFlavour - Flavour of dwarf regnumbers
 ///
 namespace DWARFFlavour {
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 49a6ca0..91cfaa9 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -18,6 +18,17 @@
 //
 let Namespace = "X86" in {
 
+  // Subregister indices.
+  def sub_8bit    : SubRegIndex;
+  def sub_8bit_hi : SubRegIndex;
+  def sub_16bit   : SubRegIndex;
+  def sub_32bit   : SubRegIndex;
+
+  def sub_ss  : SubRegIndex;
+  def sub_sd  : SubRegIndex;
+  def sub_xmm : SubRegIndex;
+
+
   // In the register alias definitions below, we define which registers alias
   // which others.  We only specify which registers the small registers alias,
   // because the register file generator is smart enough to figure out that
@@ -57,17 +68,22 @@ let Namespace = "X86" in {
   def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
 
   // 16-bit registers
+  let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
   def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
   def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
   def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
   def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+  }
+  let SubRegIndices = [sub_8bit] in {
   def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
   def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
   def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
   def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+  }
   def IP : Register<"ip">, DwarfRegNum<[16]>;
   
   // X86-64 only
+  let SubRegIndices = [sub_8bit] in {
   def R8W  : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
   def R9W  : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
   def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -76,8 +92,9 @@ let Namespace = "X86" in {
   def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
   def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
   def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
-
+  }
   // 32-bit registers
+  let SubRegIndices = [sub_16bit] in {
   def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
   def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
   def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
@@ -97,8 +114,10 @@ let Namespace = "X86" in {
   def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
   def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
   def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+  }
 
   // 64-bit registers, X86-64 only
+  let SubRegIndices = [sub_32bit] in {
   def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
   def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
   def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
@@ -117,6 +136,7 @@ let Namespace = "X86" in {
   def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
   def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
   def RIP : RegisterWithSubRegs<"rip", [EIP]>,  DwarfRegNum<[16, -2, -2]>;
+  }
 
   // MMX Registers. These are actually aliased to ST0 .. ST7
   def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -137,7 +157,9 @@ let Namespace = "X86" in {
   def FP5 : Register<"fp5">;
   def FP6 : Register<"fp6">; 
 
-  // XMM Registers, used by the various SSE instruction set extensions
+  // XMM Registers, used by the various SSE instruction set extensions.
+  // The sub_ss and sub_sd subregs are the same registers with another regclass.
+  let CompositeIndices = [(sub_ss), (sub_sd)] in {
   def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
   def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
   def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -156,8 +178,10 @@ let Namespace = "X86" in {
   def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
   def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
   def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+  }
 
   // YMM Registers, used by AVX instructions
+  let SubRegIndices = [sub_xmm] in {
   def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
   def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
   def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
@@ -174,6 +198,7 @@ let Namespace = "X86" in {
   def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
   def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
   def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+  }
 
   // Floating point stack registers
   def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -207,106 +232,19 @@ let Namespace = "X86" in {
   def DR7 : Register<"dr7">;
   
   // Condition registers
-  def ECR0 : Register<"ecr0">;
-  def ECR1 : Register<"ecr1">;
-  def ECR2 : Register<"ecr2">;
-  def ECR3 : Register<"ecr3">;
-  def ECR4 : Register<"ecr4">;
-  def ECR5 : Register<"ecr5">;
-  def ECR6 : Register<"ecr6">;
-  def ECR7 : Register<"ecr7">;
-
-  def RCR0 : Register<"rcr0">;
-  def RCR1 : Register<"rcr1">;
-  def RCR2 : Register<"rcr2">;
-  def RCR3 : Register<"rcr3">;
-  def RCR4 : Register<"rcr4">;
-  def RCR5 : Register<"rcr5">;
-  def RCR6 : Register<"rcr6">;
-  def RCR7 : Register<"rcr7">;
-  def RCR8 : Register<"rcr8">; 
+  def CR0 : Register<"cr0">;
+  def CR1 : Register<"cr1">;
+  def CR2 : Register<"cr2">;
+  def CR3 : Register<"cr3">;
+  def CR4 : Register<"cr4">;
+  def CR5 : Register<"cr5">;
+  def CR6 : Register<"cr6">;
+  def CR7 : Register<"cr7">;
+  def CR8 : Register<"cr8">;
 }
 
 
 //===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def x86_subreg_8bit    : PatLeaf<(i32 1)>;
-def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
-def x86_subreg_16bit   : PatLeaf<(i32 3)>;
-def x86_subreg_32bit   : PatLeaf<(i32 4)>;
-
-def x86_subreg_ss   : PatLeaf<(i32 1)>;
-def x86_subreg_sd   : PatLeaf<(i32 2)>;
-def x86_subreg_xmm  : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1, [AX, CX, DX, BX, SP,  BP,  SI,  DI,  
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
-                   [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
-                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [AX, CX, DX, BX],
-                   [AH, CH, DH, BH]>;
-
-def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,  
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
-                   [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
-                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [EAX, ECX, EDX, EBX],
-                   [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
-                   [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI, 
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
-                    R8,  R9,  R10, R11, R12, R13, R14, R15],
-                   [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
-                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [RAX, RCX, RDX, RBX],
-                   [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
-                    R8,  R9,  R10, R11, R12, R13, R14, R15],
-                   [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI, 
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
-                    R8,  R9,  R10, R11, R12, R13, R14, R15],
-                   [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, 
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
-
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
-                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
-                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
-                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
-                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,  
-                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
-                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
-                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
-                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-//===----------------------------------------------------------------------===//
 // Register Class Definitions... now that we have all of the pieces, define the
 // top-level register classes.  The order specified in the register list is
 // implicitly defined to be the register allocation order.
@@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8],  8,
 def GR16 : RegisterClass<"X86", [i16], 16,
                          [AX, CX, DX, SI, DI, BX, BP, SP,
                           R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
-  let SubRegClassList = [GR8, GR8];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
 def GR32 : RegisterClass<"X86", [i32], 32, 
                          [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
                           R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
-  let SubRegClassList = [GR8, GR8, GR16];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
 def GR64 : RegisterClass<"X86", [i64], 64, 
                          [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                           RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
-  let SubRegClassList = [GR8, GR8, GR16, GR32];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32 sub_32bit)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32,
 }
 
 // Control registers.
-def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32,
-                                   [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6,
-                                    ECR7]> {
-}
-
-def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64,
-                                   [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6,
-                                    RCR7, RCR8]> {
+def CONTROL_REG : RegisterClass<"X86", [i64], 64,
+                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
 }
 
 // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
@@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
 def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
 }
 def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
-  let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H];
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
 }
 def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
-  let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit)];
 }
 def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
-  let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit),
+                       (GR32_ABCD sub_32bit)];
 }
 def GR32_TC   : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
-  let SubRegClassList = [GR8, GR8, GR16];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
 def GR64_TC   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
                                                  R8, R9, R11]> {
-  let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32_TC sub_32bit)];
 }
 
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
@@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
 // GR16_NOREX - GR16 registers which do not require a REX prefix.
 def GR16_NOREX : RegisterClass<"X86", [i16], 16,
                                [AX, CX, DX, SI, DI, BX, BP, SP]> {
-  let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
 // GR32_NOREX - GR32 registers which do not require a REX prefix.
 def GR32_NOREX : RegisterClass<"X86", [i32], 32,
                                [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
-  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
 // GR64_NOREX - GR64 registers which do not require a REX prefix.
 def GR64_NOREX : RegisterClass<"X86", [i64], 64,
                                [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
-  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit),
+                       (GR32_NOREX sub_32bit)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
 def GR32_NOSP : RegisterClass<"X86", [i32], 32,
                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
                                R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
-  let SubRegClassList = [GR8, GR8, GR16];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
 def GR64_NOSP : RegisterClass<"X86", [i64], 64,
                               [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                                RBX, R14, R15, R12, R13, RBP]> {
-  let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP];
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32_NOSP sub_32bit)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
 // GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
 def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
                                     [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
-  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit),
+                       (GR32_NOREX sub_32bit)];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
 
 // A class to support the 'A' assembler constraint: EAX then EDX.
 def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
-  let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit)];
 }
 
 // Scalar SSE2 floating point registers.
@@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
                           [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
                            XMM8, XMM9, XMM10, XMM11,
                            XMM12, XMM13, XMM14, XMM15]> {
-  let SubRegClassList = [FR32, FR64];
+  let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+  
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
                           [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
                            YMM8, YMM9, YMM10, YMM11,
                            YMM12, YMM13, YMM14, YMM15]> {
-  let SubRegClassList = [FR32, FR64, VR128];
+  let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
 }
 
 // Status flags registers.
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index cd87b82..6297a27 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -12,11 +12,232 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "x86-selectiondag-info"
-#include "X86SelectionDAGInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 using namespace llvm;
 
-X86SelectionDAGInfo::X86SelectionDAGInfo() {
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+  TargetSelectionDAGInfo(TM),
+  Subtarget(&TM.getSubtarget<X86Subtarget>()),
+  TLI(*TM.getTargetLowering()) {
 }
 
 X86SelectionDAGInfo::~X86SelectionDAGInfo() {
 }
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, unsigned Align,
+                                             bool isVolatile,
+                                             const Value *DstSV,
+                                             uint64_t DstSVOff) const {
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+  // If not DWORD aligned or size is more than the threshold, call the library.
+  // The libc version is likely to be faster for these cases. It can use the
+  // address value and run time information about the CPU.
+  if ((Align & 3) != 0 ||
+      !ConstantSize ||
+      ConstantSize->getZExtValue() >
+        Subtarget->getMaxInlineSizeThreshold()) {
+    SDValue InFlag(0, 0);
+
+    // Check to see if there is a specialized entry-point for memory zeroing.
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+    if (const char *bzeroEntry =  V &&
+        V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+      EVT IntPtr = TLI.getPointerTy();
+      const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+      TargetLowering::ArgListTy Args;
+      TargetLowering::ArgListEntry Entry;
+      Entry.Node = Dst;
+      Entry.Ty = IntPtrTy;
+      Args.push_back(Entry);
+      Entry.Node = Size;
+      Args.push_back(Entry);
+      std::pair<SDValue,SDValue> CallResult =
+        TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+                        false, false, false, false,
+                        0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+                        DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+                        DAG, dl);
+      return CallResult.second;
+    }
+
+    // Otherwise have the target-independent code call memset.
+    return SDValue();
+  }
+
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  SDValue InFlag(0, 0);
+  EVT AVT;
+  SDValue Count;
+  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+  unsigned BytesLeft = 0;
+  bool TwoRepStos = false;
+  if (ValC) {
+    unsigned ValReg;
+    uint64_t Val = ValC->getZExtValue() & 255;
+
+    // If the value is a constant, then we can potentially use larger sets.
+    switch (Align & 3) {
+    case 2:   // WORD aligned
+      AVT = MVT::i16;
+      ValReg = X86::AX;
+      Val = (Val << 8) | Val;
+      break;
+    case 0:  // DWORD aligned
+      AVT = MVT::i32;
+      ValReg = X86::EAX;
+      Val = (Val << 8)  | Val;
+      Val = (Val << 16) | Val;
+      if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
+        AVT = MVT::i64;
+        ValReg = X86::RAX;
+        Val = (Val << 32) | Val;
+      }
+      break;
+    default:  // Byte aligned
+      AVT = MVT::i8;
+      ValReg = X86::AL;
+      Count = DAG.getIntPtrConstant(SizeVal);
+      break;
+    }
+
+    if (AVT.bitsGT(MVT::i8)) {
+      unsigned UBytes = AVT.getSizeInBits() / 8;
+      Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+      BytesLeft = SizeVal % UBytes;
+    }
+
+    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+                              InFlag);
+    InFlag = Chain.getValue(1);
+  } else {
+    AVT = MVT::i8;
+    Count  = DAG.getIntPtrConstant(SizeVal);
+    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+                                                              X86::ECX,
+                            Count, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+                                                              X86::EDI,
+                            Dst, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+  if (TwoRepStos) {
+    InFlag = Chain.getValue(1);
+    Count  = Size;
+    EVT CVT = Count.getValueType();
+    SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+    Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+                                                             X86::ECX,
+                              Left, InFlag);
+    InFlag = Chain.getValue(1);
+    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+    SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+    Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+  } else if (BytesLeft) {
+    // Handle the last 1 - 7 bytes.
+    unsigned Offset = SizeVal - BytesLeft;
+    EVT AddrVT = Dst.getValueType();
+    EVT SizeVT = Size.getValueType();
+
+    Chain = DAG.getMemset(Chain, dl,
+                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+                                      DAG.getConstant(Offset, AddrVT)),
+                          Src,
+                          DAG.getConstant(BytesLeft, SizeVT),
+                          Align, isVolatile, DstSV, DstSVOff + Offset);
+  }
+
+  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+  return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                        SDValue Chain, SDValue Dst, SDValue Src,
+                                        SDValue Size, unsigned Align,
+                                        bool isVolatile, bool AlwaysInline,
+                                        const Value *DstSV,
+                                        uint64_t DstSVOff,
+                                        const Value *SrcSV,
+                                        uint64_t SrcSVOff) const {
+  // This requires the copy size to be a constant, preferrably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  /// If not DWORD aligned, call the library.
+  if ((Align & 3) != 0)
+    return SDValue();
+
+  // DWORD aligned
+  EVT AVT = MVT::i32;
+  if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
+    AVT = MVT::i64;
+
+  unsigned UBytes = AVT.getSizeInBits() / 8;
+  unsigned CountVal = SizeVal / UBytes;
+  SDValue Count = DAG.getIntPtrConstant(CountVal);
+  unsigned BytesLeft = SizeVal % UBytes;
+
+  SDValue InFlag(0, 0);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+                                                              X86::ECX,
+                            Count, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+                                                              X86::EDI,
+                            Dst, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+                                                              X86::ESI,
+                            Src, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+                                array_lengthof(Ops));
+
+  SmallVector<SDValue, 4> Results;
+  Results.push_back(RepMovs);
+  if (BytesLeft) {
+    // Handle the last 1 - 7 bytes.
+    unsigned Offset = SizeVal - BytesLeft;
+    EVT DstVT = Dst.getValueType();
+    EVT SrcVT = Src.getValueType();
+    EVT SizeVT = Size.getValueType();
+    Results.push_back(DAG.getMemcpy(Chain, dl,
+                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+                                                DAG.getConstant(Offset, DstVT)),
+                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+                                                DAG.getConstant(Offset, SrcVT)),
+                                    DAG.getConstant(BytesLeft, SizeVT),
+                                    Align, isVolatile, AlwaysInline,
+                                    DstSV, DstSVOff + Offset,
+                                    SrcSV, SrcSVOff + Offset));
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &Results[0], Results.size());
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 9834754..4f30f31 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -18,10 +18,40 @@
 
 namespace llvm {
 
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
 class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const X86Subtarget *Subtarget;
+
+  const X86TargetLowering &TLI;
+
 public:
-  X86SelectionDAGInfo();
+  explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
   ~X86SelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile,
+                                  const Value *DstSV,
+                                  uint64_t DstSVOff) const;
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  const Value *DstSV,
+                                  uint64_t DstSVOff,
+                                  const Value *SrcSV,
+                                  uint64_t SrcSVOff) const;
 };
 
 }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f39904e..f2c5058 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,17 +17,13 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/CommandLine.h"
-
 using namespace llvm;
 
-static cl::opt<bool> DisableSSEDomain("disable-sse-domain",
-    cl::init(false), cl::Hidden,
-    cl::desc("Disable SSE Domain Fixing"));
-
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
@@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   }
 }
 
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+    return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+  }
+}
+
 extern "C" void LLVMInitializeX86Target() { 
   // Register the target.
   RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
@@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() {
                                      createX86_32AsmBackend);
   TargetRegistry::RegisterAsmBackend(TheX86_64Target,
                                      createX86_64AsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+                                         createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+                                         createMCStreamer);
 }
 
 
@@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
               Subtarget.getStackAlignment(),
               (Subtarget.isTargetWin64() ? -40 :
                (Subtarget.is64Bit() ? -8 : -4))),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
+    InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
+    ELFWriterInfo(*this) {
   DefRelocModel = getRelocationModel();
       
   // If no relocation model was picked, default as appropriate for the target.
@@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
 
 bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
-  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() &&
-      !DisableSSEDomain) {
+  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
     PM.add(createSSEDomainFixPass());
     return true;
   }
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index dc4234c..f9fb424 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -23,6 +23,7 @@
 #include "X86JITInfo.h"
 #include "X86Subtarget.h"
 #include "X86ISelLowering.h"
+#include "X86SelectionDAGInfo.h"
 
 namespace llvm {
   
@@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine {
   X86InstrInfo      InstrInfo;
   X86JITInfo        JITInfo;
   X86TargetLowering TLInfo;
+  X86SelectionDAGInfo TSInfo;
   X86ELFWriterInfo  ELFWriterInfo;
   Reloc::Model      DefRelocModel; // Reloc model before it's overridden.
 
@@ -54,6 +56,9 @@ public:
   virtual const X86TargetLowering *getTargetLowering() const { 
     return &TLInfo;
   }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
   virtual const X86RegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 3990b8b..b230572 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -80,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setShiftAmountType(MVT::i32);
   setStackPointerRegisterToSaveRestore(XCore::SP);
 
-  setSchedulingPreference(SchedulingForRegPressure);
+  setSchedulingPreference(Sched::RegPressure);
 
   // Use i32 for setcc operations results (slt, sgt, ...).
   setBooleanContents(ZeroOrOneBooleanContent);
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index c983112..5260258 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -361,9 +361,8 @@ bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I,
                                   unsigned DestReg, unsigned SrcReg,
                                   const TargetRegisterClass *DestRC,
-                                  const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
+                                  const TargetRegisterClass *SrcRC,
+                                  DebugLoc DL) const {
 
   if (DestRC == SrcRC) {
     if (DestRC == XCore::GRRegsRegisterClass) {
@@ -395,7 +394,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator I,
                                          unsigned SrcReg, bool isKill,
                                          int FrameIndex,
-                                         const TargetRegisterClass *RC) const
+                                         const TargetRegisterClass *RC,
+                                         const TargetRegisterInfo *TRI) const
 {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
@@ -408,7 +408,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I,
                                           unsigned DestReg, int FrameIndex,
-                                          const TargetRegisterClass *RC) const
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const
 {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
@@ -419,7 +420,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 
 bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const {
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
   if (CSI.empty()) {
     return true;
   }
@@ -437,7 +439,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     MBB.addLiveIn(it->getReg());
 
     storeRegToStackSlot(MBB, MI, it->getReg(), true,
-                        it->getFrameIdx(), it->getRegClass());
+                        it->getFrameIdx(), it->getRegClass(), &RI);
     if (emitFrameMoves) {
       MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
       BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
@@ -449,7 +451,8 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
 bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                               const std::vector<CalleeSavedInfo> &CSI) const
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                            const TargetRegisterInfo *TRI) const
 {
   bool AtStart = MI == MBB.begin();
   MachineBasicBlock::iterator BeforeI = MI;
@@ -460,7 +463,7 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
     
     loadRegFromStackSlot(MBB, MI, it->getReg(),
                                   it->getFrameIdx(),
-                                  it->getRegClass());
+                         it->getRegClass(), &RI);
     assert(MI != MBB.begin() &&
            "loadRegFromStackSlot didn't insert any code!");
     // Insert in reverse order.  loadRegFromStackSlot can insert multiple
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 3e0a765..9035ea9 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -67,25 +67,30 @@ public:
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
                             const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
 
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
 
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const;
   
   virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                               const std::vector<CalleeSavedInfo> &CSI) const;
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                           const TargetRegisterInfo *TRI) const;
 
   virtual bool ReverseBranchCondition(
                             SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 6aac237..44aeb60 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "xcore-selectiondag-info"
-#include "XCoreSelectionDAGInfo.h"
+#include "XCoreTargetMachine.h"
 using namespace llvm;
 
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() {
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
 }
 
 XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index fd96716..0386968 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -18,9 +18,11 @@
 
 namespace llvm {
 
+class XCoreTargetMachine;
+
 class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  XCoreSelectionDAGInfo();
+  explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
   ~XCoreSelectionDAGInfo();
 };
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 267f46a..b0013eb 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -28,7 +28,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
                "i16:16:32-i32:32:32-i64:32:32-n32"),
     InstrInfo(),
     FrameInfo(*this),
-    TLInfo(*this) {
+    TLInfo(*this),
+    TSInfo(*this) {
 }
 
 bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 701a6f1..14073ba 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -20,6 +20,7 @@
 #include "XCoreSubtarget.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
 
 namespace llvm {
 
@@ -29,6 +30,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreInstrInfo InstrInfo;
   XCoreFrameInfo FrameInfo;
   XCoreTargetLowering TLInfo;
+  XCoreSelectionDAGInfo TSInfo;
 public:
   XCoreTargetMachine(const Target &T, const std::string &TT,
                      const std::string &FS);
@@ -40,6 +42,10 @@ public:
     return &TLInfo;
   }
 
+  virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
   virtual const TargetRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6443dd4..692e47d 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -535,14 +535,14 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
 /// values (according to Uses) live as well.
 void DAE::MarkLive(const Function &F) {
   DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
-    // Mark the function as live.
-    LiveFunctions.insert(&F);
-    // Mark all arguments as live.
-    for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
-      PropagateLiveness(CreateArg(&F, i));
-    // Mark all return values as live.
-    for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
-      PropagateLiveness(CreateRet(&F, i));
+  // Mark the function as live.
+  LiveFunctions.insert(&F);
+  // Mark all arguments as live.
+  for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+    PropagateLiveness(CreateArg(&F, i));
+  // Mark all return values as live.
+  for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+    PropagateLiveness(CreateRet(&F, i));
 }
 
 /// MarkLive - Mark the given return value or argument as live. Additionally,
@@ -859,7 +859,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
         Value *RetVal;
 
-        if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
+        if (NFTy->getReturnType()->isVoidTy()) {
           RetVal = 0;
         } else {
           assert (RetTy->isStructTy());
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index bc8028c..8e312e7 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -54,6 +54,9 @@ namespace {
       return removeDeadFunctions(CG, &NeverInline); 
     }
     virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
   };
 }
 
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 46cf4b2..74b4a1c 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -49,6 +49,9 @@ namespace {
       CA.growCachedCostInfo(Caller, Callee);
     }
     virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
   };
 }
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b07e22c..622a9b5 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -17,32 +17,55 @@
 // important that the hash function be high quality. The equality comparison
 // iterates through each instruction in each basic block.
 //
-// When a match is found, the functions are folded. We can only fold two
-// functions when we know that the definition of one of them is not
-// overridable.
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
 //
 //===----------------------------------------------------------------------===//
 //
 // Future work:
 //
-// * fold vector<T*>::push_back and vector<S*>::push_back.
-//
-// These two functions have different types, but in a way that doesn't matter
-// to us. As long as we never see an S or T itself, using S* and S** is the
-// same as using a T* and T**.
-//
 // * virtual functions.
 //
 // Many functions have their address taken by the virtual function table for
 // the object they belong to. However, as long as it's only used for a lookup
 // and call, this is irrelevant, and we'd like to fold such implementations.
 //
+// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+//
+// The current implementation loops over a pair-wise comparison of all
+// functions in the program where the two functions in the pair are treated as
+// assumed to be equal until proven otherwise. We could both use fewer
+// comparisons and optimize more complex cases if we used strongly connected
+// components of the call graph.
+//
+// * be smarter about bitcast.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to peer through bitcasts without imposing bad
+// performance properties.
+//
+// * don't emit aliases for Mach-O.
+//
+// Mach-O doesn't support aliases which means that we must avoid introducing
+// them in the bitcode on architectures which don't support them, such as
+// Mac OSX. There's a few approaches to this problem;
+//   a) teach codegen to lower global aliases to thunks on platforms which don't
+//      support them.
+//   b) always emit thunks, and create a separate thunk-to-alias pass which
+//      runs on ELF systems. This has the added benefit of transforming other
+//      thunks such as those produced by a C++ frontend into aliases when legal
+//      to do so.
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
@@ -54,6 +77,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
 #include <map>
 #include <vector>
 using namespace llvm;
@@ -61,17 +85,33 @@ using namespace llvm;
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
 
 namespace {
-  struct MergeFunctions : public ModulePass {
+  class MergeFunctions : public ModulePass {
+  public:
     static char ID; // Pass identification, replacement for typeid
     MergeFunctions() : ModulePass(&ID) {}
 
     bool runOnModule(Module &M);
+
+  private:
+    bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+                         const GetElementPtrInst *GEP2);
+
+    bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
+    bool equals(const Function *F, const Function *G);
+
+    bool compare(const Value *V1, const Value *V2);
+
+    const Function *LHS, *RHS;
+    typedef DenseMap<const Value *, unsigned long> IDMap;
+    IDMap Map;
+    DenseMap<const Function *, IDMap> Domains;
+    DenseMap<const Function *, unsigned long> DomainCount;
+    TargetData *TD;
   };
 }
 
 char MergeFunctions::ID = 0;
-static RegisterPass<MergeFunctions>
-X("mergefunc", "Merge Functions");
+static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions");
 
 ModulePass *llvm::createMergeFunctionsPass() {
   return new MergeFunctions();
@@ -95,15 +135,6 @@ static unsigned long hash(const Function *F) {
   return ID.ComputeHash();
 }
 
-/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by
-/// walking the chain of cast operands. Otherwise, returns the argument.
-static Value* IgnoreBitcasts(Value *V) {
-  while (BitCastInst *BC = dyn_cast<BitCastInst>(V))
-    V = BC->getOperand(0);
-
-  return V;
-}
-
 /// isEquivalentType - any two pointers are equivalent. Otherwise, standard
 /// type equivalence rules apply.
 static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
@@ -113,6 +144,14 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
     return false;
 
   switch(Ty1->getTypeID()) {
+  default:
+    llvm_unreachable("Unknown type!");
+    // Fall through in Release-Asserts mode.
+  case Type::IntegerTyID:
+  case Type::OpaqueTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
   case Type::VoidTyID:
   case Type::FloatTyID:
   case Type::DoubleTyID:
@@ -123,15 +162,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
   case Type::MetadataTyID:
     return true;
 
-  case Type::IntegerTyID:
-  case Type::OpaqueTyID:
-    // Ty1 == Ty2 would have returned true earlier.
-    return false;
-
-  default:
-    llvm_unreachable("Unknown type!");
-    return false;
-
   case Type::PointerTyID: {
     const PointerType *PTy1 = cast<PointerType>(Ty1);
     const PointerType *PTy2 = cast<PointerType>(Ty2);
@@ -154,6 +184,21 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
     return true;
   }
 
+  case Type::UnionTyID: {
+    const UnionType *UTy1 = cast<UnionType>(Ty1);
+    const UnionType *UTy2 = cast<UnionType>(Ty2);
+
+    // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
+    if (UTy1->getNumElements() != UTy2->getNumElements())
+      return false;
+
+    for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
   case Type::FunctionTyID: {
     const FunctionType *FTy1 = cast<FunctionType>(Ty1);
     const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -236,123 +281,136 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
   return true;
 }
 
-static bool compare(const Value *V, const Value *U) {
-  assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
-         "Must not compare basic blocks.");
-
-  assert(isEquivalentType(V->getType(), U->getType()) &&
-        "Two of the same operation have operands of different type.");
+bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
+                                     const GetElementPtrInst *GEP2) {
+  if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+    SmallVector<Value *, 8> Indices1, Indices2;
+    for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
+           E = GEP1->idx_end(); I != E; ++I) {
+      Indices1.push_back(*I);
+    }
+    for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
+           E = GEP2->idx_end(); I != E; ++I) {
+      Indices2.push_back(*I);
+    }
+    uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+                                            Indices1.data(), Indices1.size());
+    uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+                                            Indices2.data(), Indices2.size());
+    return Offset1 == Offset2;
+  }
 
-  // TODO: If the constant is an expression of F, we should accept that it's
-  // equal to the same expression in terms of G.
-  if (isa<Constant>(V))
-    return V == U;
+  // Equivalent types aren't enough.
+  if (GEP1->getPointerOperand()->getType() !=
+      GEP2->getPointerOperand()->getType())
+    return false;
 
-  // The caller has ensured that ValueMap[V] != U. Since Arguments are
-  // pre-loaded into the ValueMap, and Instructions are added as we go, we know
-  // that this can only be a mis-match.
-  if (isa<Instruction>(V) || isa<Argument>(V))
+  if (GEP1->getNumOperands() != GEP2->getNumOperands())
     return false;
 
-  if (isa<InlineAsm>(V) && isa<InlineAsm>(U)) {
-    const InlineAsm *IAF = cast<InlineAsm>(V);
-    const InlineAsm *IAG = cast<InlineAsm>(U);
-    return IAF->getAsmString() == IAG->getAsmString() &&
-           IAF->getConstraintString() == IAG->getConstraintString();
+  for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+    if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+      return false;
   }
 
-  return false;
+  return true;
 }
 
-static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
-                   DenseMap<const Value *, const Value *> &ValueMap,
-                   DenseMap<const Value *, const Value *> &SpeculationMap) {
-  // Speculatively add it anyways. If it's false, we'll notice a difference
-  // later, and this won't matter.
-  ValueMap[BB1] = BB2;
+bool MergeFunctions::compare(const Value *V1, const Value *V2) {
+  if (V1 == LHS || V1 == RHS)
+    if (V2 == LHS || V2 == RHS)
+      return true;
 
-  BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
-  BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+  // TODO: constant expressions in terms of LHS and RHS
+  if (isa<Constant>(V1))
+    return V1 == V2;
 
-  do {
-    if (isa<BitCastInst>(FI)) {
-      ++FI;
-      continue;
-    }
-    if (isa<BitCastInst>(GI)) {
-      ++GI;
-      continue;
-    }
+  if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
+    const InlineAsm *IA1 = cast<InlineAsm>(V1);
+    const InlineAsm *IA2 = cast<InlineAsm>(V2);
+    return IA1->getAsmString() == IA2->getAsmString() &&
+           IA1->getConstraintString() == IA2->getConstraintString();
+  }
 
-    if (!isEquivalentOperation(FI, GI))
-      return false;
+  // We enumerate constants globally and arguments, basic blocks or
+  // instructions within the function they belong to.
+  const Function *Domain1 = NULL;
+  if (const Argument *A = dyn_cast<Argument>(V1)) {
+    Domain1 = A->getParent();
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
+    Domain1 = BB->getParent();
+  } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
+    Domain1 = I->getParent()->getParent();
+  }
 
-    if (isa<GetElementPtrInst>(FI)) {
-      const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI);
-      const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI);
-      if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) {
-        // It's effectively a bitcast.
-        ++FI, ++GI;
-        continue;
-      }
+  const Function *Domain2 = NULL;
+  if (const Argument *A = dyn_cast<Argument>(V2)) {
+    Domain2 = A->getParent();
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
+    Domain2 = BB->getParent();
+  } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
+    Domain2 = I->getParent()->getParent();
+  }
 
-      // TODO: we only really care about the elements before the index
-      if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType())
+  if (Domain1 != Domain2)
+    if (Domain1 != LHS && Domain1 != RHS)
+      if (Domain2 != LHS && Domain2 != RHS)
         return false;
-    }
 
-    if (ValueMap[FI] == GI) {
-      ++FI, ++GI;
-      continue;
-    }
+  IDMap &Map1 = Domains[Domain1];
+  unsigned long &ID1 = Map1[V1];
+  if (!ID1)
+    ID1 = ++DomainCount[Domain1];
 
-    if (ValueMap[FI] != NULL)
-      return false;
+  IDMap &Map2 = Domains[Domain2];
+  unsigned long &ID2 = Map2[V2];
+  if (!ID2)
+    ID2 = ++DomainCount[Domain2];
 
-    for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-      Value *OpF = IgnoreBitcasts(FI->getOperand(i));
-      Value *OpG = IgnoreBitcasts(GI->getOperand(i));
+  return ID1 == ID2;
+}
 
-      if (ValueMap[OpF] == OpG)
-        continue;
+bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
+  BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
+  BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
 
-      if (ValueMap[OpF] != NULL)
+  do {
+    if (!compare(FI, GI))
+      return false;
+
+    if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
+      const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
+      const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+
+      if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
         return false;
 
-      if (OpF->getValueID() != OpG->getValueID() ||
-          !isEquivalentType(OpF->getType(), OpG->getType()))
+      if (!isEquivalentGEP(GEP1, GEP2))
+        return false;
+    } else {
+      if (!isEquivalentOperation(FI, GI))
         return false;
 
-      if (isa<PHINode>(FI)) {
-        if (SpeculationMap[OpF] == NULL)
-          SpeculationMap[OpF] = OpG;
-        else if (SpeculationMap[OpF] != OpG)
-          return false;
-        continue;
-      } else if (isa<BasicBlock>(OpF)) {
-        assert(isa<TerminatorInst>(FI) &&
-               "BasicBlock referenced by non-Terminator non-PHI");
-        // This call changes the ValueMap, hence we can't use
-        // Value *& = ValueMap[...]
-        if (!equals(cast<BasicBlock>(OpF), cast<BasicBlock>(OpG), ValueMap,
-                    SpeculationMap))
-          return false;
-      } else {
+      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+        Value *OpF = FI->getOperand(i);
+        Value *OpG = GI->getOperand(i);
+
         if (!compare(OpF, OpG))
           return false;
-      }
 
-      ValueMap[OpF] = OpG;
+        if (OpF->getValueID() != OpG->getValueID() ||
+            !isEquivalentType(OpF->getType(), OpG->getType()))
+          return false;
+      }
     }
 
-    ValueMap[FI] = GI;
     ++FI, ++GI;
   } while (FI != FE && GI != GE);
 
   return FI == FE && GI == GE;
 }
 
-static bool equals(const Function *F, const Function *G) {
+bool MergeFunctions::equals(const Function *F, const Function *G) {
   // We need to recheck everything, but check the things that weren't included
   // in the hash first.
 
@@ -382,27 +440,46 @@ static bool equals(const Function *F, const Function *G) {
   if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
     return false;
 
-  DenseMap<const Value *, const Value *> ValueMap;
-  DenseMap<const Value *, const Value *> SpeculationMap;
-  ValueMap[F] = G;
-
   assert(F->arg_size() == G->arg_size() &&
          "Identical functions have a different number of args.");
 
-  for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
-         fe = F->arg_end(); fi != fe; ++fi, ++gi)
-    ValueMap[fi] = gi;
+  LHS = F;
+  RHS = G;
 
-  if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap,
-              SpeculationMap))
-    return false;
+  // Visit the arguments so that they get enumerated in the order they're
+  // passed in.
+  for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
+         fe = F->arg_end(); fi != fe; ++fi, ++gi) {
+    if (!compare(fi, gi))
+      llvm_unreachable("Arguments repeat");
+  }
 
-  for (DenseMap<const Value *, const Value *>::iterator
-         I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) {
-    if (ValueMap[I->first] != I->second)
+  SmallVector<const BasicBlock *, 8> FBBs, GBBs;
+  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
+  FBBs.push_back(&F->getEntryBlock());
+  GBBs.push_back(&G->getEntryBlock());
+  VisitedBBs.insert(FBBs[0]);
+  while (!FBBs.empty()) {
+    const BasicBlock *FBB = FBBs.pop_back_val();
+    const BasicBlock *GBB = GBBs.pop_back_val();
+    if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
+      Domains.clear();
+      DomainCount.clear();
       return false;
+    }
+    const TerminatorInst *FTI = FBB->getTerminator();
+    const TerminatorInst *GTI = GBB->getTerminator();
+    assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
+    for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(FTI->getSuccessor(i)))
+        continue;
+      FBBs.push_back(FTI->getSuccessor(i));
+      GBBs.push_back(GTI->getSuccessor(i));
+    }
   }
 
+  Domains.clear();
+  DomainCount.clear();
   return true;
 }
 
@@ -476,20 +553,32 @@ static LinkageCategory categorize(const Function *F) {
 }
 
 static void ThunkGToF(Function *F, Function *G) {
+  if (!G->mayBeOverridden()) {
+    // Redirect direct callers of G to F.
+    Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+    for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
+         UI != UE;) {
+      Value::use_iterator TheIter = UI;
+      ++UI;
+      CallSite CS(*TheIter);
+      if (CS && CS.isCallee(TheIter))
+        TheIter.getUse().set(BitcastF);
+    }
+  }
+
   Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
                                     G->getParent());
   BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
 
-  std::vector<Value *> Args;
+  SmallVector<Value *, 16> Args;
   unsigned i = 0;
   const FunctionType *FFTy = F->getFunctionType();
   for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
        AI != AE; ++AI) {
-    if (FFTy->getParamType(i) == AI->getType())
+    if (FFTy->getParamType(i) == AI->getType()) {
       Args.push_back(AI);
-    else {
-      Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB);
-      Args.push_back(BCI);
+    } else {
+      Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
     }
     ++i;
   }
@@ -510,8 +599,6 @@ static void ThunkGToF(Function *F, Function *G) {
   NewG->takeName(G);
   G->replaceAllUsesWith(NewG);
   G->eraseFromParent();
-
-  // TODO: look at direct callers to G and make them all direct callers to F.
 }
 
 static void AliasGToF(Function *F, Function *G) {
@@ -542,67 +629,66 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
   }
 
   switch (catF) {
+  case ExternalStrong:
+    switch (catG) {
     case ExternalStrong:
-      switch (catG) {
-        case ExternalStrong:
-        case ExternalWeak:
-          ThunkGToF(F, G);
-          break;
-        case Internal:
-          if (G->hasAddressTaken())
-            ThunkGToF(F, G);
-          else
-            AliasGToF(F, G);
-          break;
-      }
+    case ExternalWeak:
+      ThunkGToF(F, G);
       break;
+    case Internal:
+      if (G->hasAddressTaken())
+        ThunkGToF(F, G);
+      else
+        AliasGToF(F, G);
+      break;
+    }
+    break;
 
-    case ExternalWeak: {
-      assert(catG == ExternalWeak);
+  case ExternalWeak: {
+    assert(catG == ExternalWeak);
 
-      // Make them both thunks to the same internal function.
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
-                                     F->getParent());
-      H->copyAttributesFrom(F);
-      H->takeName(F);
-      F->replaceAllUsesWith(H);
+    // Make them both thunks to the same internal function.
+    F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                   F->getParent());
+    H->copyAttributesFrom(F);
+    H->takeName(F);
+    F->replaceAllUsesWith(H);
 
-      ThunkGToF(F, G);
-      ThunkGToF(F, H);
+    ThunkGToF(F, G);
+    ThunkGToF(F, H);
 
-      F->setLinkage(GlobalValue::InternalLinkage);
-    } break;
+    F->setLinkage(GlobalValue::InternalLinkage);
+  } break;
 
-    case Internal:
-      switch (catG) {
-        case ExternalStrong:
-          llvm_unreachable(0);
-          // fall-through
-        case ExternalWeak:
-          if (F->hasAddressTaken())
-            ThunkGToF(F, G);
-          else
-            AliasGToF(F, G);
-          break;
-        case Internal: {
-          bool addrTakenF = F->hasAddressTaken();
-          bool addrTakenG = G->hasAddressTaken();
-          if (!addrTakenF && addrTakenG) {
-            std::swap(FnVec[i], FnVec[j]);
-            std::swap(F, G);
-            std::swap(addrTakenF, addrTakenG);
-          }
+  case Internal:
+    switch (catG) {
+    case ExternalStrong:
+      llvm_unreachable(0);
+      // fall-through
+    case ExternalWeak:
+      if (F->hasAddressTaken())
+        ThunkGToF(F, G);
+      else
+        AliasGToF(F, G);
+      break;
+    case Internal: {
+      bool addrTakenF = F->hasAddressTaken();
+      bool addrTakenG = G->hasAddressTaken();
+      if (!addrTakenF && addrTakenG) {
+        std::swap(FnVec[i], FnVec[j]);
+        std::swap(F, G);
+        std::swap(addrTakenF, addrTakenG);
+      }
 
-          if (addrTakenF && addrTakenG) {
-            ThunkGToF(F, G);
-          } else {
-            assert(!addrTakenG);
-            AliasGToF(F, G);
-          }
-        } break;
+      if (addrTakenF && addrTakenG) {
+        ThunkGToF(F, G);
+      } else {
+        assert(!addrTakenG);
+        AliasGToF(F, G);
       }
-      break;
+    } break;
+  } break;
   }
 
   ++NumFunctionsMerged;
@@ -619,22 +705,20 @@ bool MergeFunctions::runOnModule(Module &M) {
   std::map<unsigned long, std::vector<Function *> > FnMap;
 
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration() || F->isIntrinsic())
+    if (F->isDeclaration())
       continue;
 
     FnMap[hash(F)].push_back(F);
   }
 
-  // TODO: instead of running in a loop, we could also fold functions in
-  // callgraph order. Constructing the CFG probably isn't cheaper than just
-  // running in a loop, unless it happened to already be available.
+  TD = getAnalysisIfAvailable<TargetData>();
 
   bool LocalChanged;
   do {
     LocalChanged = false;
     DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
     for (std::map<unsigned long, std::vector<Function *> >::iterator
-         I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
+           I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
       std::vector<Function *> &FnVec = I->second;
       DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
 
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 310e4a2..6bc8e66 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -229,6 +229,12 @@ static bool StripDebugInfo(Module &M) {
     NMD->eraseFromParent();
   }
   
+  NMD = M.getNamedMetadata("llvm.dbg.lv");
+  if (NMD) {
+    Changed = true;
+    NMD->eraseFromParent();
+  }
+  
   unsigned MDDbgKind = M.getMDKindID("dbg");
   for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) 
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index bd06499..c7b04a4 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -51,7 +51,7 @@ static inline unsigned getComplexity(Value *V) {
 /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
 /// just like the normal insertion helper, but also adds any new instructions
 /// to the instcombine worklist.
-class VISIBILITY_HIDDEN InstCombineIRInserter 
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter 
     : public IRBuilderDefaultInserter<true> {
   InstCombineWorklist &Worklist;
 public:
@@ -65,7 +65,7 @@ public:
 };
   
 /// InstCombiner - The -instcombine pass.
-class VISIBILITY_HIDDEN InstCombiner
+class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
                                public InstVisitor<InstCombiner, Instruction*> {
   TargetData *TD;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index eb7628e..b0137c4 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -442,7 +442,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid cast: " << CI);
+          " to avoid cast: " << CI << '\n');
     Value *Res = EvaluateInDifferentType(Src, DestTy, false);
     assert(Res->getType() == DestTy);
     return ReplaceInstUsesWith(CI, Res);
@@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   return commonPointerCastTransforms(CI);
 }
 
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type.  Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+                                         InstCombiner &IC) {
+  // We can only do this optimization if the output is a multiple of the input
+  // element size, or the input is a multiple of the output element size.
+  // Convert the input type to have the same element type as the output.
+  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  
+  if (SrcTy->getElementType() != DestTy->getElementType()) {
+    // The input types don't need to be identical, but for now they must be the
+    // same size.  There is no specific reason we couldn't handle things like
+    // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+    // there yet. 
+    if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+        DestTy->getElementType()->getPrimitiveSizeInBits())
+      return 0;
+    
+    SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+  }
+  
+  // Now that the element types match, get the shuffle mask and RHS of the
+  // shuffle to use, which depends on whether we're increasing or decreasing the
+  // size of the input.
+  SmallVector<Constant*, 16> ShuffleMask;
+  Value *V2;
+  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+  
+  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+    // If we're shrinking the number of elements, just shuffle in the low
+    // elements from the input and use undef as the second shuffle input.
+    V2 = UndefValue::get(SrcTy);
+    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+    
+  } else {
+    // If we're increasing the number of elements, shuffle in all of the
+    // elements from InVal and fill the rest of the result elements with zeros
+    // from a constant zero.
+    V2 = Constant::getNullValue(SrcTy);
+    unsigned SrcElts = SrcTy->getNumElements();
+    for (unsigned i = 0, e = SrcElts; i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+    // The excess elements reference the first element of the zero input.
+    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+                       ConstantInt::get(Int32Ty, SrcElts));
+  }
+  
+  Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+  return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   // If the operands are integer typed then apply the integer transforms,
   // otherwise just apply the common ones.
@@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
     }
+    
+    // If this is a cast from an integer to vector, check to see if the input
+    // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+    // the casts with a shuffle and (potentially) a bitcast.
+    if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
+      CastInst *SrcCast = cast<CastInst>(Src);
+      if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+        if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+          if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+                                               cast<VectorType>(DestTy), *this))
+            return I;
+    }
   }
 
   if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 9d88621..9100a85 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -22,7 +22,7 @@ namespace llvm {
   
 /// InstCombineWorklist - This is the worklist management logic for
 /// InstCombine.
-class VISIBILITY_HIDDEN InstCombineWorklist {
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
   SmallVector<Instruction*, 256> Worklist;
   DenseMap<Instruction*, unsigned> WorklistMap;
   
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 5778864..1a3b10c 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_library(LLVMScalarOpts
   SimplifyCFGPass.cpp
   SimplifyHalfPowrLibCalls.cpp
   SimplifyLibCalls.cpp
+  Sink.cpp
   TailDuplication.cpp
   TailRecursionElimination.cpp
   )
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 65b34b1..ca8ab49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -868,7 +868,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   
   const Type *StoredValTy = StoredVal->getType();
   
-  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
   uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
   
   // If the store and reload are the same size, we can always reuse it.
@@ -1132,8 +1132,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
                                    Instruction *InsertPt, const TargetData &TD){
   LLVMContext &Ctx = SrcVal->getType()->getContext();
   
-  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
-  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
   
   IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
   
@@ -1604,7 +1604,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     }
   }
   if (!NeedToSplit.empty()) {
-    toSplit.append(NeedToSplit.size(), NeedToSplit.front());
+    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
     return false;
   }
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index cf3d16f..86ea3eb 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -107,11 +107,13 @@ namespace {
 class RegUseTracker {
   typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
 
-  RegUsesTy RegUses;
+  RegUsesTy RegUsesMap;
   SmallVector<const SCEV *, 16> RegSequence;
 
 public:
   void CountRegister(const SCEV *Reg, size_t LUIdx);
+  void DropRegister(const SCEV *Reg, size_t LUIdx);
+  void DropUse(size_t LUIdx);
 
   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
 
@@ -132,7 +134,7 @@ public:
 void
 RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
   std::pair<RegUsesTy::iterator, bool> Pair =
-    RegUses.insert(std::make_pair(Reg, RegSortData()));
+    RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
   RegSortData &RSD = Pair.first->second;
   if (Pair.second)
     RegSequence.push_back(Reg);
@@ -140,11 +142,28 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
   RSD.UsedByIndices.set(LUIdx);
 }
 
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+  RegUsesTy::iterator It = RegUsesMap.find(Reg);
+  assert(It != RegUsesMap.end());
+  RegSortData &RSD = It->second;
+  assert(RSD.UsedByIndices.size() > LUIdx);
+  RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::DropUse(size_t LUIdx) {
+  // Remove the use index from every register's use list.
+  for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+       I != E; ++I)
+    I->second.UsedByIndices.reset(LUIdx);
+}
+
 bool
 RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
-  if (!RegUses.count(Reg)) return false;
+  if (!RegUsesMap.count(Reg)) return false;
   const SmallBitVector &UsedByIndices =
-    RegUses.find(Reg)->second.UsedByIndices;
+    RegUsesMap.find(Reg)->second.UsedByIndices;
   int i = UsedByIndices.find_first();
   if (i == -1) return false;
   if ((size_t)i != LUIdx) return true;
@@ -152,13 +171,13 @@ RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
 }
 
 const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
-  RegUsesTy::const_iterator I = RegUses.find(Reg);
-  assert(I != RegUses.end() && "Unknown register!");
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  assert(I != RegUsesMap.end() && "Unknown register!");
   return I->second.UsedByIndices;
 }
 
 void RegUseTracker::clear() {
-  RegUses.clear();
+  RegUsesMap.clear();
   RegSequence.clear();
 }
 
@@ -188,6 +207,8 @@ struct Formula {
   unsigned getNumRegs() const;
   const Type *getType() const;
 
+  void DeleteBaseReg(const SCEV *&S);
+
   bool referencesReg(const SCEV *S) const;
   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
                                   const RegUseTracker &RegUses) const;
@@ -291,6 +312,13 @@ const Type *Formula::getType() const {
          0;
 }
 
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+  if (&S != &BaseRegs.back())
+    std::swap(S, BaseRegs.back());
+  BaseRegs.pop_back();
+}
+
 /// referencesReg - Test if this formula references the given register.
 bool Formula::referencesReg(const SCEV *S) const {
   return S == ScaledReg ||
@@ -326,6 +354,13 @@ void Formula::print(raw_ostream &OS) const {
     if (!First) OS << " + "; else First = false;
     OS << "reg(" << **I << ')';
   }
+  if (AM.HasBaseReg && BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: HasBaseReg**";
+  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: !HasBaseReg**";
+  }
   if (AM.Scale != 0) {
     if (!First) OS << " + "; else First = false;
     OS << AM.Scale << "*reg(";
@@ -345,8 +380,7 @@ void Formula::dump() const {
 /// without changing its value.
 static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(),
-                     SE.getTypeSizeInBits(AR->getType()) + 1);
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
 }
 
@@ -354,8 +388,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
 /// without changing its value.
 static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(),
-                     SE.getTypeSizeInBits(A->getType()) + 1);
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
 }
 
@@ -363,8 +396,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
 /// without changing its value.
 static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(),
-                     SE.getTypeSizeInBits(A->getType()) + 1);
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
   return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
 }
 
@@ -432,14 +464,14 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
       bool Found = false;
       for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
            I != E; ++I) {
+        const SCEV *S = *I;
         if (!Found)
-          if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+          if (const SCEV *Q = getExactSDiv(S, RHS, SE,
                                            IgnoreSignificantBits)) {
-            Ops.push_back(Q);
+            S = Q;
             Found = true;
-            continue;
           }
-        Ops.push_back(*I);
+        Ops.push_back(S);
       }
       return Found ? SE.getMulExpr(Ops) : 0;
     }
@@ -810,8 +842,7 @@ struct LSRFixup {
 }
 
 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0),
-    LUIdx(~size_t(0)), Offset(0) {}
+  : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
 
 /// isUseFullyOutsideLoop - Test whether this fixup always uses its
 /// value outside of the given loop.
@@ -934,7 +965,10 @@ public:
                                       MaxOffset(INT64_MIN),
                                       AllFixupsOutsideLoop(true) {}
 
+  bool HasFormulaWithSameRegs(const Formula &F) const;
   bool InsertFormula(const Formula &F);
+  void DeleteFormula(Formula &F);
+  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
 
   void check() const;
 
@@ -942,6 +976,16 @@ public:
   void dump() const;
 };
 
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
+  return Uniquifier.count(Key);
+}
+
 /// InsertFormula - If the given formula has not yet been inserted, add it to
 /// the list, and return true. Return false otherwise.
 bool LSRUse::InsertFormula(const Formula &F) {
@@ -972,6 +1016,33 @@ bool LSRUse::InsertFormula(const Formula &F) {
   return true;
 }
 
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+  if (&F != &Formulae.back())
+    std::swap(F, Formulae.back());
+  Formulae.pop_back();
+  assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+  // Now that we've filtered out some formulae, recompute the Regs set.
+  SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+  Regs.clear();
+  for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+       E = Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+    if (F.ScaledReg) Regs.insert(F.ScaledReg);
+    Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+  }
+
+  // Update the RegTracker.
+  for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+       E = OldRegs.end(); I != E; ++I)
+    if (!Regs.count(*I))
+      RegUses.DropRegister(*I, LUIdx);
+}
+
 void LSRUse::print(raw_ostream &OS) const {
   OS << "LSR Use: Kind=";
   switch (Kind) {
@@ -1091,6 +1162,13 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
+  // Canonicalize a scale of 1 to a base register if the formula doesn't
+  // already have a base register.
+  if (!AM.HasBaseReg && AM.Scale == 1) {
+    AM.Scale = 0;
+    AM.HasBaseReg = true;
+  }
+
   return isLegalUse(AM, Kind, AccessTy, TLI);
 }
 
@@ -1186,7 +1264,7 @@ class LSRInstance {
   void OptimizeShadowIV();
   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
-  bool OptimizeLoopTermCond();
+  void OptimizeLoopTermCond();
 
   void CollectInterestingTypesAndFactors();
   void CollectFixupsAndInitialFormulae();
@@ -1200,13 +1278,17 @@ class LSRInstance {
   typedef DenseMap<const SCEV *, size_t> UseMapTy;
   UseMapTy UseMap;
 
-  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
                           LSRUse::KindType Kind, const Type *AccessTy);
 
   std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
                                     LSRUse::KindType Kind,
                                     const Type *AccessTy);
 
+  void DeleteUse(LSRUse &LU);
+
+  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
 public:
   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
@@ -1227,6 +1309,8 @@ public:
   void GenerateAllReuseFormulae();
 
   void FilterOutUndesirableDedicatedRegisters();
+
+  size_t EstimateSearchSpaceComplexity() const;
   void NarrowSearchSpaceUsingHeuristics();
 
   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1375,6 +1459,7 @@ void LSRInstance::OptimizeShadowIV() {
     /* Remove cast operation */
     ShadowUse->replaceAllUsesWith(NewPH);
     ShadowUse->eraseFromParent();
+    Changed = true;
     break;
   }
 }
@@ -1382,8 +1467,7 @@ void LSRInstance::OptimizeShadowIV() {
 /// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
 /// set the IV user and stride information and return true, otherwise return
 /// false.
-bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
-                                    IVStrideUse *&CondUse) {
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
   for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
     if (UI->getUser() == Cond) {
       // NOTE: we could handle setcc instructions with multiple uses here, but
@@ -1555,7 +1639,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
 
 /// OptimizeLoopTermCond - Change loop terminating condition to use the
 /// postinc iv when possible.
-bool
+void
 LSRInstance::OptimizeLoopTermCond() {
   SmallPtrSet<Instruction *, 4> PostIncs;
 
@@ -1621,13 +1705,13 @@ LSRInstance::OptimizeLoopTermCond() {
           }
           if (const SCEVConstant *D =
                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+            const ConstantInt *C = D->getValue();
             // Stride of one or negative one can have reuse with non-addresses.
-            if (D->getValue()->isOne() ||
-                D->getValue()->isAllOnesValue())
+            if (C->isOne() || C->isAllOnesValue())
               goto decline_post_inc;
             // Avoid weird situations.
-            if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
-                D->getValue()->getValue().isMinSignedValue())
+            if (C->getValue().getMinSignedBits() >= 64 ||
+                C->getValue().isMinSignedValue())
               goto decline_post_inc;
             // Without TLI, assume that any stride might be valid, and so any
             // use might be shared.
@@ -1636,7 +1720,7 @@ LSRInstance::OptimizeLoopTermCond() {
             // Check for possible scaled-address reuse.
             const Type *AccessTy = getAccessType(UI->getUser());
             TargetLowering::AddrMode AM;
-            AM.Scale = D->getValue()->getSExtValue();
+            AM.Scale = C->getSExtValue();
             if (TLI->isLegalAddressingMode(AM, AccessTy))
               goto decline_post_inc;
             AM.Scale = -AM.Scale;
@@ -1691,12 +1775,13 @@ LSRInstance::OptimizeLoopTermCond() {
     else if (BB != IVIncInsertPos->getParent())
       IVIncInsertPos = BB->getTerminator();
   }
-
-  return Changed;
 }
 
+/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
 bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
                                 LSRUse::KindType Kind, const Type *AccessTy) {
   int64_t NewMinOffset = LU.MinOffset;
   int64_t NewMaxOffset = LU.MaxOffset;
@@ -1709,12 +1794,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
     return false;
   // Conservatively assume HasBaseReg is true for now.
   if (NewOffset < LU.MinOffset) {
-    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
+    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
                           Kind, AccessTy, TLI))
       return false;
     NewMinOffset = NewOffset;
   } else if (NewOffset > LU.MaxOffset) {
-    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
+    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
                           Kind, AccessTy, TLI))
       return false;
     NewMaxOffset = NewOffset;
@@ -1753,7 +1838,7 @@ LSRInstance::getUse(const SCEV *&Expr,
     // A use already existed with this base.
     size_t LUIdx = P.first->second;
     LSRUse &LU = Uses[LUIdx];
-    if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
+    if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
       // Reuse this use.
       return std::make_pair(LUIdx, Offset);
   }
@@ -1774,6 +1859,47 @@ LSRInstance::getUse(const SCEV *&Expr,
   return std::make_pair(LUIdx, Offset);
 }
 
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU) {
+  if (&LU != &Uses.back())
+    std::swap(LU, Uses.back());
+  Uses.pop_back();
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+                                       const LSRUse &OrigLU) {
+  // Search all uses for the formula. This could be more clever. Ignore
+  // ICmpZero uses because they may contain formulae generated by
+  // GenerateICmpZeroScales, in which case adding fixup offsets may
+  // be invalid.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    if (&LU != &OrigLU &&
+        LU.Kind != LSRUse::ICmpZero &&
+        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+        LU.HasFormulaWithSameRegs(OrigF)) {
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        if (F.BaseRegs == OrigF.BaseRegs &&
+            F.ScaledReg == OrigF.ScaledReg &&
+            F.AM.BaseGV == OrigF.AM.BaseGV &&
+            F.AM.Scale == OrigF.AM.Scale &&
+            LU.Kind) {
+          if (F.AM.BaseOffs == 0)
+            return &LU;
+          break;
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
 void LSRInstance::CollectInterestingTypesAndFactors() {
   SmallSetVector<const SCEV *, 4> Strides;
 
@@ -1867,6 +1993,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
         if (NV == LF.OperandValToReplace) {
           CI->setOperand(1, CI->getOperand(0));
           CI->setOperand(0, NV);
+          NV = CI->getOperand(1);
+          Changed = true;
         }
 
         // x == y  -->  x - y == 0
@@ -1901,6 +2029,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
   DEBUG(print_fixups(dbgs()));
 }
 
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
 void
 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   Formula F;
@@ -1909,6 +2040,8 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
 }
 
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
 void
 LSRInstance::InsertSupplementalFormula(const SCEV *S,
                                        LSRUse &LU, size_t LUIdx) {
@@ -2265,8 +2398,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
 
 /// GenerateScales - Generate stride factor reuse formulae by making use of
 /// scaled-offset address modes, for example.
-void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
-                                 Formula Base) {
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   // Determine the integer type for the base formula.
   const Type *IntTy = Base.getType();
   if (!IntTy) return;
@@ -2312,8 +2444,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
           // TODO: This could be optimized to avoid all the copying.
           Formula F = Base;
           F.ScaledReg = Quotient;
-          std::swap(F.BaseRegs[i], F.BaseRegs.back());
-          F.BaseRegs.pop_back();
+          F.DeleteBaseReg(F.BaseRegs[i]);
           (void)InsertFormula(LU, LUIdx, F);
         }
       }
@@ -2321,8 +2452,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
 }
 
 /// GenerateTruncates - Generate reuse formulae from different IV types.
-void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx,
-                                    Formula Base) {
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   // This requires TargetLowering to tell us which truncates are free.
   if (!TLI) return;
 
@@ -2479,7 +2609,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
 
     // TODO: Use a more targeted data structure.
     for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
-      Formula F = LU.Formulae[L];
+      const Formula &F = LU.Formulae[L];
       // Use the immediate in the scaled register.
       if (F.ScaledReg == OrigReg) {
         int64_t Offs = (uint64_t)F.AM.BaseOffs +
@@ -2527,10 +2657,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
                J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
                J != JE; ++J)
             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
-              if (C->getValue()->getValue().isNegative() !=
-                    (NewF.AM.BaseOffs < 0) &&
-                  C->getValue()->getValue().abs()
-                    .ule(abs64(NewF.AM.BaseOffs)))
+              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+                   abs64(NewF.AM.BaseOffs)) &&
+                  (C->getValue()->getValue() +
+                   NewF.AM.BaseOffs).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.AM.BaseOffs))
                 goto skip_formula;
 
           // Ok, looks good.
@@ -2579,7 +2710,7 @@ LSRInstance::GenerateAllReuseFormulae() {
 /// by other uses, pick the best one and delete the others.
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 #ifndef NDEBUG
-  bool Changed = false;
+  bool ChangedFormulae = false;
 #endif
 
   // Collect the best formula for each unique set of shared registers. This
@@ -2591,10 +2722,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
     FormulaSorter Sorter(L, LU, SE, DT);
+    DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
 
-    // Clear out the set of used regs; it will be recomputed.
-    LU.Regs.clear();
-
+    bool Any = false;
     for (size_t FIdx = 0, NumForms = LU.Formulae.size();
          FIdx != NumForms; ++FIdx) {
       Formula &F = LU.Formulae[FIdx];
@@ -2619,62 +2749,200 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
         Formula &Best = LU.Formulae[P.first->second];
         if (Sorter.operator()(F, Best))
           std::swap(F, Best);
-        DEBUG(dbgs() << "Filtering out "; F.print(dbgs());
+        DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
-                        "  in favor of "; Best.print(dbgs());
+                        "    in favor of formula "; Best.print(dbgs());
               dbgs() << '\n');
 #ifndef NDEBUG
-        Changed = true;
+        ChangedFormulae = true;
 #endif
-        std::swap(F, LU.Formulae.back());
-        LU.Formulae.pop_back();
+        LU.DeleteFormula(F);
         --FIdx;
         --NumForms;
+        Any = true;
         continue;
       }
-      if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
-      LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
     }
+
+    // Now that we've filtered out some formulae, recompute the Regs set.
+    if (Any)
+      LU.RecomputeRegs(LUIdx, RegUses);
+
+    // Reset this to prepare for the next use.
     BestFormulae.clear();
   }
 
-  DEBUG(if (Changed) {
+  DEBUG(if (ChangedFormulae) {
           dbgs() << "\n"
                     "After filtering out undesirable candidates:\n";
           print_uses(dbgs());
         });
 }
 
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+  uint32_t Power = 1;
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    size_t FSize = I->Formulae.size();
+    if (FSize >= ComplexityLimit) {
+      Power = ComplexityLimit;
+      break;
+    }
+    Power *= FSize;
+    if (Power >= ComplexityLimit)
+      break;
+  }
+  return Power;
+}
+
 /// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
 /// formulae to choose from, use some rough heuristics to prune down the number
 /// of formulae. This keeps the main solver from taking an extraordinary amount
 /// of time in some worst-case scenarios.
 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
-  // This is a rough guess that seems to work fairly well.
-  const size_t Limit = UINT16_MAX;
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
 
-  SmallPtrSet<const SCEV *, 4> Taken;
-  for (;;) {
-    // Estimate the worst-case number of solutions we might consider. We almost
-    // never consider this many solutions because we prune the search space,
-    // but the pruning isn't always sufficient.
-    uint32_t Power = 1;
-    for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-         E = Uses.end(); I != E; ++I) {
-      size_t FSize = I->Formulae.size();
-      if (FSize >= Limit) {
-        Power = Limit;
-        break;
+    DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+                    "which use a superset of registers used by other "
+                    "formulae.\n");
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      bool Any = false;
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        // Look for a formula with a constant or GV in a register. If the use
+        // also has a formula with that same value in an immediate field,
+        // delete the one that uses a register.
+        for (SmallVectorImpl<const SCEV *>::const_iterator
+             I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+            Formula NewF = F;
+            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                (I - F.BaseRegs.begin()));
+            if (LU.HasFormulaWithSameRegs(NewF)) {
+              DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+              LU.DeleteFormula(F);
+              --i;
+              --e;
+              Any = true;
+              break;
+            }
+          } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+            if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+              if (!F.AM.BaseGV) {
+                Formula NewF = F;
+                NewF.AM.BaseGV = GV;
+                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                    (I - F.BaseRegs.begin()));
+                if (LU.HasFormulaWithSameRegs(NewF)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LU.DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                  break;
+                }
+              }
+          }
+        }
       }
-      Power *= FSize;
-      if (Power >= Limit)
-        break;
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
     }
-    if (Power < Limit)
-      break;
 
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+                    "separated by a constant offset will use the same "
+                    "registers.\n");
+
+    // This is especially useful for unrolled loops.
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+          if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+                                   /*HasBaseReg=*/false,
+                                   LU.Kind, LU.AccessTy)) {
+              DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
+                    dbgs() << '\n');
+
+              LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+              // Delete formulae from the new use which are no longer legal.
+              bool Any = false;
+              for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+                Formula &F = LUThatHas->Formulae[i];
+                if (!isLegalUse(F.AM,
+                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LUThatHas->DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                }
+              }
+              if (Any)
+                LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+              // Update the relocs to reference the new use.
+              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+                   E = Fixups.end(); I != E; ++I) {
+                LSRFixup &Fixup = *I;
+                if (Fixup.LUIdx == LUIdx) {
+                  Fixup.LUIdx = LUThatHas - &Uses.front();
+                  Fixup.Offset += F.AM.BaseOffs;
+                  DEBUG(errs() << "New fixup has offset "
+                               << Fixup.Offset << '\n');
+                }
+                if (Fixup.LUIdx == NumUses-1)
+                  Fixup.LUIdx = LUIdx;
+              }
+
+              // Delete the old use.
+              DeleteUse(LU);
+              --LUIdx;
+              --NumUses;
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+
+  // With all other options exhausted, loop until the system is simple
+  // enough to handle.
+  SmallPtrSet<const SCEV *, 4> Taken;
+  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     // Ok, we have too many of formulae on our hands to conveniently handle.
     // Use a rough heuristic to thin out the list.
+    DEBUG(dbgs() << "The search space is too complex.\n");
 
     // Pick the register which is used by the most LSRUses, which is likely
     // to be a good reuse register candidate.
@@ -2702,28 +2970,26 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
 
     // In any use with formulae which references this register, delete formulae
     // which don't reference it.
-    for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(),
-         E = Uses.end(); I != E; ++I) {
-      LSRUse &LU = *I;
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
       if (!LU.Regs.count(Best)) continue;
 
-      // Clear out the set of used regs; it will be recomputed.
-      LU.Regs.clear();
-
+      bool Any = false;
       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
         Formula &F = LU.Formulae[i];
         if (!F.referencesReg(Best)) {
           DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
-          std::swap(LU.Formulae.back(), F);
-          LU.Formulae.pop_back();
+          LU.DeleteFormula(F);
           --e;
           --i;
+          Any = true;
+          assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
           continue;
         }
-
-        if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
-        LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
       }
+
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
     }
 
     DEBUG(dbgs() << "After pre-selection:\n";
@@ -2810,11 +3076,14 @@ retry:
   // If none of the formulae had all of the required registers, relax the
   // constraint so that we don't exclude all formulae.
   if (!AnySatisfiedReqRegs) {
+    assert(!ReqRegs.empty() && "Solver failed even without required registers");
     ReqRegs.clear();
     goto retry;
   }
 }
 
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
 void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   SmallVector<const Formula *, 8> Workspace;
   Cost SolutionCost;
@@ -2824,6 +3093,7 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   DenseSet<const SCEV *> VisitedRegs;
   Workspace.reserve(Uses.size());
 
+  // SolveRecurse does all the work.
   SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
                CurRegs, VisitedRegs);
 
@@ -2839,17 +3109,8 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
           Solution[i]->print(dbgs());
           dbgs() << '\n';
         });
-}
 
-/// getImmediateDominator - A handy utility for the specific DominatorTree
-/// query that we need here.
-///
-static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
-  DomTreeNode *Node = DT.getNode(BB);
-  if (!Node) return 0;
-  Node = Node->getIDom();
-  if (!Node) return 0;
-  return Node->getBlock();
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
 }
 
 /// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
@@ -2865,9 +3126,11 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
     unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
 
     BasicBlock *IDom;
-    for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) {
-      IDom = getImmediateDominator(Rung, DT);
-      if (!IDom) return IP;
+    for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+      if (!Rung) return IP;
+      Rung = Rung->getIDom();
+      if (!Rung) return IP;
+      IDom = Rung->getBlock();
 
       // Don't climb into a loop though.
       const Loop *IDomLoop = LI.getLoopFor(IDom);
@@ -2891,7 +3154,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
       // instead of at the end, so that it can be used for other expansions.
       if (IDom == Inst->getParent() &&
           (!BetterPos || DT.dominates(BetterPos, Inst)))
-        BetterPos = next(BasicBlock::iterator(Inst));
+        BetterPos = llvm::next(BasicBlock::iterator(Inst));
     }
     if (!AllDominate)
       break;
@@ -2957,6 +3220,8 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
   return IP;
 }
 
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
 Value *LSRInstance::Expand(const LSRFixup &LF,
                            const Formula &F,
                            BasicBlock::iterator IP,
@@ -3212,6 +3477,8 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
   DeadInsts.push_back(LF.OperandValToReplace);
 }
 
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
 void
 LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
                                Pass *P) {
@@ -3224,10 +3491,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
   // Expand the new value definitions and update the users.
-  for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
-    size_t LUIdx = Fixups[i].LUIdx;
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    const LSRFixup &Fixup = *I;
 
-    Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
+    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
 
     Changed = true;
   }
@@ -3256,13 +3524,11 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
         WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
         dbgs() << ":\n");
 
-  /// OptimizeShadowIV - If IV is used in a int-to-float cast
-  /// inside the loop then try to eliminate the cast operation.
+  // First, perform some low-level loop optimizations.
   OptimizeShadowIV();
+  OptimizeLoopTermCond();
 
-  // Change loop terminating condition to use the postinc iv when possible.
-  Changed |= OptimizeLoopTermCond();
-
+  // Start collecting data and preparing for the solver.
   CollectInterestingTypesAndFactors();
   CollectFixupsAndInitialFormulae();
   CollectLoopInvariantFixupsAndFormulae();
@@ -3283,7 +3549,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
 
   SmallVector<const Formula *, 8> Solution;
   Solve(Solution);
-  assert(Solution.size() == Uses.size() && "Malformed solution!");
 
   // Release memory that is no longer needed.
   Factors.clear();
@@ -3333,9 +3598,8 @@ void LSRInstance::print_fixups(raw_ostream &OS) const {
   OS << "LSR is examining the following fixup sites:\n";
   for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
        E = Fixups.end(); I != E; ++I) {
-    const LSRFixup &LF = *I;
     dbgs() << "  ";
-    LF.print(OS);
+    I->print(OS);
     OS << '\n';
   }
 }
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b621e8d..9744100 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -58,13 +58,20 @@ FunctionPass *llvm::createCFGSimplificationPass() {
 
 /// ChangeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I) {
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
     (*SI)->removePredecessor(BB);
   
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst::Create(TrapFn, "", I);
+  }
   new UnreachableInst(I->getContext(), I);
   
   // All instructions after this are dead.
@@ -118,7 +125,8 @@ static bool MarkAliveBlocks(BasicBlock *BB,
           // though.
           ++BBI;
           if (!isa<UnreachableInst>(BBI)) {
-            ChangeToUnreachable(BBI);
+            // Don't insert a call to llvm.trap right before the unreachable.
+            ChangeToUnreachable(BBI, false);
             Changed = true;
           }
           break;
@@ -134,7 +142,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
              SI->getPointerAddressSpace() == 0)) {
-          ChangeToUnreachable(SI);
+          ChangeToUnreachable(SI, true);
           Changed = true;
           break;
         }
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b053cfc..7414be7 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -558,10 +558,13 @@ struct MemCmpOpt : public LibCallOptimization {
     if (Len == 0) // memcmp(s1,s2,0) -> 0
       return Constant::getNullValue(CI->getType());
 
-    if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
-      Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
-      Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv");
-      return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
+    // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+    if (Len == 1) {
+      Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+                                 CI->getType(), "lhsv");
+      Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+                                 CI->getType(), "rhsv");
+      return B.CreateSub(LHSV, RHSV, "chardiff");
     }
 
     // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 0000000..b88ba48
--- /dev/null
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,267 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+  class Sinking : public FunctionPass {
+    DominatorTree *DT;
+    LoopInfo *LI;
+    AliasAnalysis *AA;
+
+  public:
+    static char ID; // Pass identification
+    Sinking() : FunctionPass(&ID) {}
+    
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+    }
+  private:
+    bool ProcessBlock(BasicBlock &BB);
+    bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+    bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+  };
+} // end anonymous namespace
+  
+char Sinking::ID = 0;
+static RegisterPass<Sinking>
+X("sink", "Code sinking");
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, 
+                                      BasicBlock *BB) const {
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+  for (Value::use_iterator I = Inst->use_begin(),
+       E = Inst->use_end(); I != E; ++I) {
+    // Determine the block of the use.
+    Instruction *UseInst = cast<Instruction>(*I);
+    BasicBlock *UseBlock = UseInst->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+      // PHI nodes use the operand in the predecessor block, not the block with
+      // the PHI.
+      unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+      UseBlock = PN->getIncomingBlock(Num);
+    }
+    // Check that it dominates.
+    if (!DT->dominates(BB, UseBlock))
+      return false;
+  }
+  return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTree>();
+  LI = &getAnalysis<LoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool EverMadeChange = false;
+  
+  while (1) {
+    bool MadeChange = false;
+
+    // Process all basic blocks.
+    for (Function::iterator I = F.begin(), E = F.end(); 
+         I != E; ++I)
+      MadeChange |= ProcessBlock(*I);
+    
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
+  } 
+  return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+  // Can't sink anything out of a block that has less than two successors.
+  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an unreachable
+  // loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&BB)) return false;
+
+  bool MadeChange = false;
+
+  // Walk the basic block bottom-up.  Remember if we saw a store.
+  BasicBlock::iterator I = BB.end();
+  --I;
+  bool ProcessedBegin = false;
+  SmallPtrSet<Instruction *, 8> Stores;
+  do {
+    Instruction *Inst = I;  // The instruction to sink.
+    
+    // Predecrement I (if it's not begin) so that it isn't invalidated by
+    // sinking.
+    ProcessedBegin = I == BB.begin();
+    if (!ProcessedBegin)
+      --I;
+
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    if (SinkInstruction(Inst, Stores))
+      ++NumSunk, MadeChange = true;
+    
+    // If we just processed the first instruction in the block, we're done.
+  } while (!ProcessedBegin);
+  
+  return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+                         SmallPtrSet<Instruction *, 8> &Stores) {
+  if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+    if (L->isVolatile()) return false;
+
+    Value *Ptr = L->getPointerOperand();
+    unsigned Size = AA->getTypeStoreSize(L->getType());
+    for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+         E = Stores.end(); I != E; ++I)
+      if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+        return false;
+  }
+
+  if (Inst->mayWriteToMemory()) {
+    Stores.insert(Inst);
+    return false;
+  }
+
+  return Inst->isSafeToSpeculativelyExecute();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+                              SmallPtrSet<Instruction *, 8> &Stores) {
+  // Check if it's safe to move the instruction.
+  if (!isSafeToMove(Inst, AA, Stores))
+    return false;
+  
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+  
+  // Loop over all the operands of the specified instruction.  If there is
+  // anything we can't handle, bail out.
+  BasicBlock *ParentBlock = Inst->getParent();
+  
+  // SuccToSinkTo - This is the successor to sink this instruction to, once we
+  // decide.
+  BasicBlock *SuccToSinkTo = 0;
+  
+  // FIXME: This picks a successor to sink into based on having one
+  // successor that dominates all the uses.  However, there are cases where
+  // sinking can happen but where the sink point isn't a successor.  For
+  // example:
+  //   x = computation
+  //   if () {} else {}
+  //   use x
+  // the instruction could be sunk over the whole diamond for the 
+  // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+  // after that.
+  
+  // Instructions can only be sunk if all their uses are in blocks
+  // dominated by one of the successors.
+  // Look at all the successors and decide which one
+  // we should sink to.
+  for (succ_iterator SI = succ_begin(ParentBlock),
+       E = succ_end(ParentBlock); SI != E; ++SI) {
+    if (AllUsesDominatedByBlock(Inst, *SI)) {
+      SuccToSinkTo = *SI;
+      break;
+    }
+  }
+      
+  // If we couldn't find a block to sink to, ignore this instruction.
+  if (SuccToSinkTo == 0)
+    return false;
+  
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (Inst->getParent() == SuccToSinkTo)
+    return false;
+  
+  DEBUG(dbgs() << "Sink instr " << *Inst);
+  DEBUG(dbgs() << "to block ";
+        WriteAsOperand(dbgs(), SuccToSinkTo, false));
+  
+  // If the block has multiple predecessors, this would introduce computation on
+  // a path that it doesn't already exist.  We could split the critical edge,
+  // but for now we just punt.
+  // FIXME: Split critical edges if not backedges.
+  if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    if (!Inst->isSafeToSpeculativelyExecute()) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
+  }
+  
+  // Determine where to insert into.  Skip phi nodes.
+  BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+  while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+    ++InsertPos;
+  
+  // Move the instruction.
+  Inst->moveBefore(InsertPos);
+  return true;
+}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 8ad66dd..6d4fe4b 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -344,7 +344,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
   DILocation OrigLocation = ILoc.getOrigLocation();
   MDNode *NewLoc = TheCallMD;
   if (OrigLocation.Verify())
-    NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD);
+    NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD);
 
   Value *MDVs[] = {
     InsnMD->getOperand(0), // Line
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index f181f3a..13f0a28 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -861,7 +861,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
     // Find the nearest store that has a lower than this load. 
     StoresByIndexTy::iterator I = 
       std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
-                       std::pair<unsigned, StoreInst*>(LoadIdx, 0),
+                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
                        StoreIndexSearchPredicate());
     
     // If there is no store before this load, then we can't promote this load.
@@ -886,7 +886,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
 void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                                      StoreInst *SI) {
   DIVariable DIVar(DDI->getVariable());
-  if (!DIVar.getNode())
+  if (!DIVar.Verify())
     return;
 
   if (!DIF)
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 25d50db..f4bdb527 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ssaupdater"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/AlignOf.h"
@@ -20,40 +19,17 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 using namespace llvm;
 
-/// BBInfo - Per-basic block information used internally by SSAUpdater.
-/// The predecessors of each block are cached here since pred_iterator is
-/// slow and we need to iterate over the blocks at least a few times.
-class SSAUpdater::BBInfo {
-public:
-  BasicBlock *BB;      // Back-pointer to the corresponding block.
-  Value *AvailableVal; // Value to use in this block.
-  BBInfo *DefBB;       // Block that defines the available value.
-  int BlkNum;          // Postorder number.
-  BBInfo *IDom;        // Immediate dominator.
-  unsigned NumPreds;   // Number of predecessor blocks.
-  BBInfo **Preds;      // Array[NumPreds] of predecessor blocks.
-  PHINode *PHITag;     // Marker for existing PHIs that match.
-
-  BBInfo(BasicBlock *ThisBB, Value *V)
-    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
-      NumPreds(0), Preds(0), PHITag(0) { }
-};
-
-typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
-
 typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static BBMapTy *getBBMap(void *BM) {
-  return static_cast<BBMapTy*>(BM);
-}
-
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {}
+  : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete &getAvailableVals(AV);
@@ -105,9 +81,7 @@ static bool IsEquivalentPHI(PHINode *PHI,
 /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
 /// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
-  assert(BM == 0 && "Unexpected Internal State");
   Value *Res = GetValueAtEndOfBlockInternal(BB);
-  assert(BM == 0 && "Unexpected Internal State");
   return Res;
 }
 
@@ -231,427 +205,126 @@ void SSAUpdater::RewriteUse(Use &U) {
   U.set(V);
 }
 
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it.  If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
-Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  if (Value *V = AvailableVals[BB])
-    return V;
-
-  // Pool allocation used internally by GetValueAtEndOfBlock.
-  BumpPtrAllocator Allocator;
-  BBMapTy BBMapObj;
-  BM = &BBMapObj;
-
-  SmallVector<BBInfo*, 100> BlockList;
-  BuildBlockList(BB, &BlockList, &Allocator);
-
-  // Special case: bail out if BB is unreachable.
-  if (BlockList.size() == 0) {
-    BM = 0;
-    return UndefValue::get(PrototypeValue->getType());
-  }
-
-  FindDominators(&BlockList);
-  FindPHIPlacement(&BlockList);
-  FindAvailableVals(&BlockList);
-
-  BM = 0;
-  return BBMapObj[BB]->DefBB->AvailableVal;
+/// PHIiter - Iterator for PHI operands.  This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+  class PHIiter {
+  private:
+    PHINode *PHI;
+    unsigned idx;
+
+  public:
+    explicit PHIiter(PHINode *P) // begin iterator
+      : PHI(P), idx(0) {}
+    PHIiter(PHINode *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+    PHIiter &operator++() { ++idx; return *this; } 
+    bool operator==(const PHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const PHIiter& x) const { return !operator==(x); }
+    Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+    BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+  };
 }
 
-/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
-/// vector, set Info->NumPreds, and allocate space in Info->Preds.
-static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info,
-                                  SmallVectorImpl<BasicBlock*> *Preds,
-                                  BumpPtrAllocator *Allocator) {
-  // We can get our predecessor info by walking the pred_iterator list,
-  // but it is relatively slow.  If we already have PHI nodes in this
-  // block, walk one of them to get the predecessor list instead.
-  BasicBlock *BB = Info->BB;
-  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
-    for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
-      Preds->push_back(SomePhi->getIncomingBlock(PI));
-  } else {
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-      Preds->push_back(*PI);
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+  typedef BasicBlock BlkT;
+  typedef Value *ValT;
+  typedef PHINode PhiT;
+
+  typedef succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+  typedef PHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
   }
 
-  Info->NumPreds = Preds->size();
-  Info->Preds = static_cast<SSAUpdater::BBInfo**>
-    (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*),
-                         AlignOf<SSAUpdater::BBInfo*>::Alignment));
-}
-
-/// BuildBlockList - Starting from the specified basic block, traverse back
-/// through its predecessors until reaching blocks with known values.  Create
-/// BBInfo structures for the blocks and append them to the block list.
-void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
-                                BumpPtrAllocator *Allocator) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<BBInfo*, 10> RootList;
-  SmallVector<BBInfo*, 64> WorkList;
-
-  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
-  (*BBMap)[BB] = Info;
-  WorkList.push_back(Info);
-
-  // Search backward from BB, creating BBInfos along the way and stopping when
-  // reaching blocks that define the value.  Record those defining blocks on
-  // the RootList.
-  SmallVector<BasicBlock*, 10> Preds;
-  while (!WorkList.empty()) {
-    Info = WorkList.pop_back_val();
-    Preds.clear();
-    FindPredecessorBlocks(Info, &Preds, Allocator);
-
-    // Treat an unreachable predecessor as a definition with 'undef'.
-    if (Info->NumPreds == 0) {
-      Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
-      Info->DefBB = Info;
-      RootList.push_back(Info);
-      continue;
-    }
-
-    for (unsigned p = 0; p != Info->NumPreds; ++p) {
-      BasicBlock *Pred = Preds[p];
-      // Check if BBMap already has a BBInfo for the predecessor block.
-      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
-      if (BBMapBucket.second) {
-        Info->Preds[p] = BBMapBucket.second;
-        continue;
-      }
-
-      // Create a new BBInfo for the predecessor.
-      Value *PredVal = AvailableVals.lookup(Pred);
-      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
-      BBMapBucket.second = PredInfo;
-      Info->Preds[p] = PredInfo;
-
-      if (PredInfo->AvailableVal) {
-        RootList.push_back(PredInfo);
-        continue;
-      }
-      WorkList.push_back(PredInfo);
+  /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+  /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+  static void FindPredecessorBlocks(BasicBlock *BB,
+                                    SmallVectorImpl<BasicBlock*> *Preds) {
+    // We can get our predecessor info by walking the pred_iterator list,
+    // but it is relatively slow.  If we already have PHI nodes in this
+    // block, walk one of them to get the predecessor list instead.
+    if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+      for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+        Preds->push_back(SomePhi->getIncomingBlock(PI));
+    } else {
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        Preds->push_back(*PI);
     }
   }
 
-  // Now that we know what blocks are backwards-reachable from the starting
-  // block, do a forward depth-first traversal to assign postorder numbers
-  // to those blocks.
-  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
-  unsigned BlkNum = 1;
-
-  // Initialize the worklist with the roots from the backward traversal.
-  while (!RootList.empty()) {
-    Info = RootList.pop_back_val();
-    Info->IDom = PseudoEntry;
-    Info->BlkNum = -1;
-    WorkList.push_back(Info);
+  /// GetUndefVal - Get an undefined value of the same type as the value
+  /// being handled.
+  static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+    return UndefValue::get(Updater->PrototypeValue->getType());
   }
 
-  while (!WorkList.empty()) {
-    Info = WorkList.back();
-
-    if (Info->BlkNum == -2) {
-      // All the successors have been handled; assign the postorder number.
-      Info->BlkNum = BlkNum++;
-      // If not a root, put it on the BlockList.
-      if (!Info->AvailableVal)
-        BlockList->push_back(Info);
-      WorkList.pop_back();
-      continue;
-    }
-
-    // Leave this entry on the worklist, but set its BlkNum to mark that its
-    // successors have been put on the worklist.  When it returns to the top
-    // the list, after handling its successors, it will be assigned a number.
-    Info->BlkNum = -2;
-
-    // Add unvisited successors to the work list.
-    for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB);
-         SI != E; ++SI) {
-      BBInfo *SuccInfo = (*BBMap)[*SI];
-      if (!SuccInfo || SuccInfo->BlkNum)
-        continue;
-      SuccInfo->BlkNum = -1;
-      WorkList.push_back(SuccInfo);
-    }
+  /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+  /// Reserve space for the operands but do not fill them in yet.
+  static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+                               SSAUpdater *Updater) {
+    PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
+                                   Updater->PrototypeValue->getName(),
+                                   &BB->front());
+    PHI->reserveOperandSpace(NumPreds);
+    return PHI;
   }
-  PseudoEntry->BlkNum = BlkNum;
-}
 
-/// IntersectDominators - This is the dataflow lattice "meet" operation for
-/// finding dominators.  Given two basic blocks, it walks up the dominator
-/// tree until it finds a common dominator of both.  It uses the postorder
-/// number of the blocks to determine how to do that.
-static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1,
-                                               SSAUpdater::BBInfo *Blk2) {
-  while (Blk1 != Blk2) {
-    while (Blk1->BlkNum < Blk2->BlkNum) {
-      Blk1 = Blk1->IDom;
-      if (!Blk1)
-        return Blk2;
-    }
-    while (Blk2->BlkNum < Blk1->BlkNum) {
-      Blk2 = Blk2->IDom;
-      if (!Blk2)
-        return Blk1;
-    }
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+    PHI->addIncoming(Val, Pred);
   }
-  return Blk1;
-}
 
-/// FindDominators - Calculate the dominator tree for the subset of the CFG
-/// corresponding to the basic blocks on the BlockList.  This uses the
-/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
-/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
-/// Because the CFG subset does not include any edges leading into blocks that
-/// define the value, the results are not the usual dominator tree.  The CFG
-/// subset has a single pseudo-entry node with edges to a set of root nodes
-/// for blocks that define the value.  The dominators for this subset CFG are
-/// not the standard dominators but they are adequate for placing PHIs within
-/// the subset CFG.
-void SSAUpdater::FindDominators(BlockListTy *BlockList) {
-  bool Changed;
-  do {
-    Changed = false;
-    // Iterate over the list in reverse order, i.e., forward on CFG edges.
-    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-           E = BlockList->rend(); I != E; ++I) {
-      BBInfo *Info = *I;
-
-      // Start with the first predecessor.
-      assert(Info->NumPreds > 0 && "unreachable block");
-      BBInfo *NewIDom = Info->Preds[0];
-
-      // Iterate through the block's other predecessors.
-      for (unsigned p = 1; p != Info->NumPreds; ++p) {
-        BBInfo *Pred = Info->Preds[p];
-        NewIDom = IntersectDominators(NewIDom, Pred);
-      }
-
-      // Check if the IDom value has changed.
-      if (NewIDom != Info->IDom) {
-        Info->IDom = NewIDom;
-        Changed = true;
-      }
-    }
-  } while (Changed);
-}
-
-/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
-/// any blocks containing definitions of the value.  If one is found, then the
-/// successor of Pred is in the dominance frontier for the definition, and
-/// this function returns true.
-static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred,
-                               const SSAUpdater::BBInfo *IDom) {
-  for (; Pred != IDom; Pred = Pred->IDom) {
-    if (Pred->DefBB == Pred)
-      return true;
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static PHINode *InstrIsPHI(Instruction *I) {
+    return dyn_cast<PHINode>(I);
   }
-  return false;
-}
-
-/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
-/// the known definitions.  Iteratively add PHIs in the dom frontiers until
-/// nothing changes.  Along the way, keep track of the nearest dominating
-/// definitions for non-PHI blocks.
-void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
-  bool Changed;
-  do {
-    Changed = false;
-    // Iterate over the list in reverse order, i.e., forward on CFG edges.
-    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-           E = BlockList->rend(); I != E; ++I) {
-      BBInfo *Info = *I;
-
-      // If this block already needs a PHI, there is nothing to do here.
-      if (Info->DefBB == Info)
-        continue;
-
-      // Default to use the same def as the immediate dominator.
-      BBInfo *NewDefBB = Info->IDom->DefBB;
-      for (unsigned p = 0; p != Info->NumPreds; ++p) {
-        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
-          // Need a PHI here.
-          NewDefBB = Info;
-          break;
-        }
-      }
-
-      // Check if anything changed.
-      if (NewDefBB != Info->DefBB) {
-        Info->DefBB = NewDefBB;
-        Changed = true;
-      }
-    }
-  } while (Changed);
-}
-
-/// FindAvailableVal - If this block requires a PHI, first check if an existing
-/// PHI matches the PHI placement and reaching definitions computed earlier,
-/// and if not, create a new PHI.  Visit all the block's predecessors to
-/// calculate the available value for each one and fill in the incoming values
-/// for a new PHI.
-void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
 
-  // Go through the worklist in forward order (i.e., backward through the CFG)
-  // and check if existing PHIs can be used.  If not, create empty PHIs where
-  // they are needed.
-  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
-       I != E; ++I) {
-    BBInfo *Info = *I;
-    // Check if there needs to be a PHI in BB.
-    if (Info->DefBB != Info)
-      continue;
-
-    // Look for an existing PHI.
-    FindExistingPHI(Info->BB, BlockList);
-    if (Info->AvailableVal)
-      continue;
-
-    PHINode *PHI = PHINode::Create(PrototypeValue->getType(),
-                                   PrototypeValue->getName(),
-                                   &Info->BB->front());
-    PHI->reserveOperandSpace(Info->NumPreds);
-    Info->AvailableVal = PHI;
-    AvailableVals[Info->BB] = PHI;
+  /// ValueIsPHI - Check if a value is a PHI.
+  ///
+  static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+    return dyn_cast<PHINode>(Val);
   }
 
-  // Now go back through the worklist in reverse order to fill in the arguments
-  // for any new PHIs added in the forward traversal.
-  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-         E = BlockList->rend(); I != E; ++I) {
-    BBInfo *Info = *I;
-
-    if (Info->DefBB != Info) {
-      // Record the available value at join nodes to speed up subsequent
-      // uses of this SSAUpdater for the same value.
-      if (Info->NumPreds > 1)
-        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
-      continue;
-    }
-
-    // Check if this block contains a newly added PHI.
-    PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal);
-    if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds)
-      continue;
-
-    // Iterate through the block's predecessors.
-    for (unsigned p = 0; p != Info->NumPreds; ++p) {
-      BBInfo *PredInfo = Info->Preds[p];
-      BasicBlock *Pred = PredInfo->BB;
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != PredInfo)
-        PredInfo = PredInfo->DefBB;
-      PHI->addIncoming(PredInfo->AvailableVal, Pred);
-    }
-
-    DEBUG(dbgs() << "  Inserted PHI: " << *PHI << "\n");
-
-    // If the client wants to know about all new instructions, tell it.
-    if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+    PHINode *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumIncomingValues() == 0)
+      return PHI;
+    return 0;
   }
-}
 
-/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
-/// them match what is needed.
-void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) {
-  PHINode *SomePHI;
-  for (BasicBlock::iterator It = BB->begin();
-       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-    if (CheckIfPHIMatches(SomePHI)) {
-      RecordMatchingPHI(SomePHI);
-      break;
-    }
-    // Match failed: clear all the PHITag values.
-    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
-         I != E; ++I)
-      (*I)->PHITag = 0;
+  /// GetPHIValue - For the specified PHI instruction, return the value
+  /// that it defines.
+  static Value *GetPHIValue(PHINode *PHI) {
+    return PHI;
   }
-}
+};
 
-/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
-/// in the BBMap.
-bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Mark that the block containing this PHI has been visited.
-  (*BBMap)[PHI->getParent()]->PHITag = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      Value *IncomingVal = PHI->getIncomingValue(i);
-      BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)];
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != PredInfo)
-        PredInfo = PredInfo->DefBB;
-
-      // Check if it matches the expected value.
-      if (PredInfo->AvailableVal) {
-        if (IncomingVal == PredInfo->AvailableVal)
-          continue;
-        return false;
-      }
-
-      // Check if the value is a PHI in the correct block.
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
-      if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
-        return false;
-
-      // If this block has already been visited, check if this PHI matches.
-      if (PredInfo->PHITag) {
-        if (IncomingPHIVal == PredInfo->PHITag)
-          continue;
-        return false;
-      }
-      PredInfo->PHITag = IncomingPHIVal;
-
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
-  return true;
-}
+} // End llvm namespace
 
-/// RecordMatchingPHI - For a PHI node that matches, record it and its input
-/// PHIs in both the BBMap and the AvailableVals mapping.
-void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Record this PHI.
-  BasicBlock *BB = PHI->getParent();
-  AvailableVals[BB] = PHI;
-  (*BBMap)[BB]->AvailableVal = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
-      if (!IncomingPHIVal) continue;
-      BB = IncomingPHIVal->getParent();
-      BBInfo *Info = (*BBMap)[BB];
-      if (!Info || Info->AvailableVal)
-        continue;
-
-      // Record the PHI and add it to the worklist.
-      AvailableVals[BB] = IncomingPHIVal;
-      Info->AvailableVal = IncomingPHIVal;
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
 }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6c1aa5e..e48c026 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -677,11 +677,16 @@ void SlotTracker::processFunction() {
       if (!I->getType()->isVoidTy() && !I->hasName())
         CreateFunctionSlot(I);
       
-      // Intrinsics can directly use metadata.
-      if (isa<IntrinsicInst>(I))
-        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-          if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
-            CreateMetadataSlot(N);
+      // Intrinsics can directly use metadata.  We allow direct calls to any
+      // llvm.foo function here, because the target may not be linked into the
+      // optimizer.
+      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (Function *F = CI->getCalledFunction())
+          if (F->getName().startswith("llvm."))
+            for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+              if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+                CreateMetadataSlot(N);
+      }
 
       // Process metadata attached with this instruction.
       I->getAllMetadata(MDForInst);
@@ -1568,6 +1573,7 @@ void AssemblyWriter::printFunction(const Function *F) {
   case CallingConv::Cold:         Out << "coldcc "; break;
   case CallingConv::X86_StdCall:  Out << "x86_stdcallcc "; break;
   case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+  case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break;
   case CallingConv::ARM_APCS:     Out << "arm_apcscc "; break;
   case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
   case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
@@ -1840,6 +1846,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::Cold:  Out << " coldcc"; break;
     case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
     case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
     case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
@@ -1892,6 +1899,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::Cold:  Out << " coldcc"; break;
     case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
     case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
     case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
@@ -2024,9 +2032,9 @@ static void WriteMDNodeComment(const MDNode *Node,
     return;
   ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0));
   if (!CI) return;
-  unsigned Val = CI->getZExtValue();
-  unsigned Tag = Val & ~LLVMDebugVersionMask;
-  if (Val < LLVMDebugVersion)
+  APInt Val = CI->getValue();
+  APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
+  if (Val.ult(LLVMDebugVersion))
     return;
   
   Out.PadToColumn(50);
@@ -2040,8 +2048,10 @@ static void WriteMDNodeComment(const MDNode *Node,
     Out << "; [ DW_TAG_vector_type ]";
   else if (Tag == dwarf::DW_TAG_user_base)
     Out << "; [ DW_TAG_user_base ]";
-  else if (const char *TagName = dwarf::TagString(Tag))
-    Out << "; [ " << TagName << " ]";
+  else if (Tag.isIntN(32)) {
+    if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
+      Out << "; [ " << TagName << " ]";
+  }
 }
 
 void AssemblyWriter::writeAllMDNodes() {
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index b28fdeb..a56938c 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -275,7 +275,7 @@ public:
       addImmutablePass(IP);
       recordAvailableAnalysis(IP);
     } else {
-      P->assignPassManager(activeStack);
+      P->assignPassManager(activeStack, PMT_FunctionPassManager);
     }
 
   }
@@ -418,7 +418,7 @@ public:
       addImmutablePass(IP);
       recordAvailableAnalysis(IP);
     } else {
-      P->assignPassManager(activeStack);
+      P->assignPassManager(activeStack, PMT_ModulePassManager);
     }
   }
 
@@ -1270,20 +1270,30 @@ FunctionPassManager::~FunctionPassManager() {
   delete FPM;
 }
 
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void FunctionPassManager::addImpl(Pass *P) {
+  FPM->add(P);
+}
+
 /// add - Add a pass to the queue of passes to run.  This passes
 /// ownership of the Pass to the PassManager.  When the
 /// PassManager_X is destroyed, the pass will be destroyed as well, so
 /// there is no need to delete the pass. (TODO delete passes.)
 /// This implies that all passes MUST be allocated with 'new'.
 void FunctionPassManager::add(Pass *P) { 
-  if (ShouldPrintBeforePass(P))
-    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
-                             + P->getPassName() + " ***"));
-  FPM->add(P);
+  // If this is a not a function pass, don't add a printer for it.
+  if (P->getPassKind() == PT_Function)
+    if (ShouldPrintBeforePass(P))
+      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                                   + P->getPassName() + " ***"));
 
-  if (ShouldPrintAfterPass(P))
-    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
-                             + P->getPassName() + " ***"));
+  addImpl(P);
+
+  if (P->getPassKind() == PT_Function)
+    if (ShouldPrintAfterPass(P))
+      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                                   + P->getPassName() + " ***"));
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep
@@ -1588,20 +1598,26 @@ PassManager::~PassManager() {
   delete PM;
 }
 
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void PassManager::addImpl(Pass *P) {
+  PM->add(P);
+}
+
 /// add - Add a pass to the queue of passes to run.  This passes ownership of
 /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
 /// will be destroyed as well, so there is no need to delete the pass.  This
 /// implies that all passes MUST be allocated with 'new'.
 void PassManager::add(Pass *P) {
   if (ShouldPrintBeforePass(P))
-    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
-                             + P->getPassName() + " ***"));
+    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                                 + P->getPassName() + " ***"));
 
-  PM->add(P);
+  addImpl(P);
 
   if (ShouldPrintAfterPass(P))
-    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
-                             + P->getPassName() + " ***"));
+    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                                 + P->getPassName() + " ***"));
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep track of
@@ -1764,7 +1780,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS,
 
     // [3] Assign manager to manage this new manager. This may create
     // and push new managers into PMS
-    BBP->assignPassManager(PMS);
+    BBP->assignPassManager(PMS, PreferredType);
 
     // [4] Push new manager into PMS
     PMS.push(BBP);
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index a092cd1..d2a8ce3 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -61,6 +61,10 @@ bool EVT::isExtended256BitVector() const {
   return isExtendedVector() && getSizeInBits() == 256;
 }
 
+bool EVT::isExtended512BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 512;
+}
+
 EVT EVT::getExtendedVectorElementType() const {
   assert(isExtended() && "Type is not extended!");
   return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -121,6 +125,7 @@ std::string EVT::getEVTString() const {
   case MVT::v1i64:   return "v1i64";
   case MVT::v2i64:   return "v2i64";
   case MVT::v4i64:   return "v4i64";
+  case MVT::v8i64:   return "v8i64";
   case MVT::v2f32:   return "v2f32";
   case MVT::v4f32:   return "v4f32";
   case MVT::v8f32:   return "v8f32";
@@ -165,6 +170,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
   case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
+  case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
   case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
   case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 6ad4272..75988cc 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -688,6 +688,7 @@ void Verifier::visitFunction(Function &F) {
   case CallingConv::Fast:
   case CallingConv::Cold:
   case CallingConv::X86_FastCall:
+  case CallingConv::X86_ThisCall:
     Assert1(!F.isVarArg(),
             "Varargs functions must have C calling conventions!", &F);
     break;
@@ -1152,7 +1153,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
     Assert1(CS.arg_size() == FTy->getNumParams(),
             "Incorrect number of arguments passed to called function!", I);
 
-  // Verify that all arguments to the call match the function type...
+  // Verify that all arguments to the call match the function type.
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
     Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
             "Call parameter type does not match function signature!",
@@ -1179,8 +1180,8 @@ void Verifier::VerifyCallSite(CallSite CS) {
     }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 ||
-      CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") {
+  if (!CS.getCalledFunction() ||
+      !CS.getCalledFunction()->getName().startswith("llvm.")) {
     for (FunctionType::param_iterator PI = FTy->param_begin(),
            PE = FTy->param_end(); PI != PE; ++PI)
       Assert1(!PI->get()->isMetadataTy(),
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index ff01506..f775c61 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast
 ; PR1925
 
 	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index 06bc987..8ef8c7b 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast
 ; PR1925
 
 	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
index 670d204..a48e41f 100644
--- a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi -mattr=+v6
 ; PR4166
 
 	%"byte[]" = type { i32, i8* }
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
index 75610ff..912e6f9 100644
--- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=fast
 ; PR4100
 @.str = external constant [30 x i8]		; <[30 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index f2288c3..89c9037 100644
--- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.32
+; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.16
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
 
@@ -7,12 +7,12 @@ entry:
   br i1 undef, label %return, label %bb
 
 bb:                                               ; preds = %bb, %entry
-  %0 = load float* undef, align 4                 ; <float> [#uses=1]
-  %1 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
-  %2 = insertelement <4 x float> %1, float undef, i32 3 ; <<4 x float>> [#uses=1]
-  %3 = fmul <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
-  %4 = extractelement <4 x float> %3, i32 1       ; <float> [#uses=1]
-  store float %4, float* undef, align 4
+  %0 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> undef, i16 %0, i32 2
+  %2 = insertelement <8 x i16> %1, i16 undef, i32 3
+  %3 = mul <8 x i16> %2, %2
+  %4 = extractelement <8 x i16> %3, i32 2
+  store i16 %4, i16* undef, align 2
   br i1 undef, label %return, label %bb
 
 return:                                           ; preds = %bb, %entry
diff --git a/test/CodeGen/ARM/2010-05-14-IllegalType.ll b/test/CodeGen/ARM/2010-05-14-IllegalType.ll
new file mode 100644
index 0000000..99e5b09
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-14-IllegalType.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 -mtriple=thumbv7-eabi -float-abi=hard < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define <4 x i64> @f_4_i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; CHECK: vadd.i64
+ %y = add <4 x i64> %a, %b
+ ret <4 x i64> %y
+}
diff --git a/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll b/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
new file mode 100644
index 0000000..2a4bbd1
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR7158
+
+define arm_aapcs_vfpcc i32 @main() nounwind {
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb.i19, label %bb3
+
+bb.i19:                                           ; preds = %bb.i19, %bb3
+  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
+  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
+  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
+  br label %bb.i19
+}
diff --git a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
new file mode 100644
index 0000000..813bf3c
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -regalloc=fast -verify-machineinstrs
+target triple = "arm-pc-linux-gnu"
+
+; This test case would accidentally use the same physreg for two virtregs
+; because allocVirtReg forgot to check if registers were already used in the
+; instruction.
+; This caused the RegScavenger to complain, but -verify-machineinstrs also
+; catches it.
+
+%struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
+
+@search = external global %struct.CHESS_POSITION  ; <%struct.CHESS_POSITION*> [#uses=1]
+@bishop_mobility_rr45 = external global [64 x [256 x i32]] ; <[64 x [256 x i32]]*> [#uses=1]
+
+declare fastcc i32 @FirstOne()
+
+define fastcc void @Evaluate() {
+entry:
+  br i1 false, label %cond_false186, label %cond_true
+
+cond_true:                                        ; preds = %entry
+  ret void
+
+cond_false186:                                    ; preds = %entry
+  br i1 false, label %cond_true293, label %bb203
+
+bb203:                                            ; preds = %cond_false186
+  ret void
+
+cond_true293:                                     ; preds = %cond_false186
+  br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:                                     ; preds = %cond_true293
+  br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:                                     ; preds = %cond_true293
+  ret void
+
+cond_true397.preheader:                           ; preds = %cond_true298
+  ret void
+
+cond_next518:                                     ; preds = %cond_true298
+  br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:                                     ; preds = %cond_next518
+  ret void
+
+bb1069:                                           ; preds = %cond_next518
+  br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:                                           ; preds = %bb1069
+  ret void
+
+cond_next1131:                                    ; preds = %bb1069
+  br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:                                    ; preds = %cond_next1131
+  ret void
+
+cond_next1207:                                    ; preds = %cond_next1131
+  br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:                                    ; preds = %cond_next1207
+  ret void
+
+cond_next1219:                                    ; preds = %cond_next1207
+  br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:                                    ; preds = %cond_next1219
+  br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:                                    ; preds = %cond_true1223
+  br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:                                    ; preds = %cond_true1223
+  ret void
+
+cond_next1283:                                    ; preds = %cond_next1219
+  ret void
+
+cond_true1369.preheader:                          ; preds = %cond_true1254
+  ret void
+
+bb1567:                                           ; preds = %cond_true1254
+  %tmp1591 = load i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+  %tmp1572 = tail call fastcc i32 @FirstOne()     ; <i32> [#uses=1]
+  %tmp1594 = load i32* undef                      ; <i32> [#uses=1]
+  %tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8     ; <i8> [#uses=1]
+  %shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
+  %tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6    ; <i64> [#uses=1]
+  %tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32    ; <i32> [#uses=1]
+  %tmp1596 = and i32 %tmp1595.upgrd.7, 255        ; <i32> [#uses=1]
+  %gep.upgrd.8 = zext i32 %tmp1596 to i64         ; <i64> [#uses=1]
+  %tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
+  %tmp1599 = load i32* %tmp1598                   ; <i32> [#uses=1]
+  %tmp1602 = sub i32 0, %tmp1599                  ; <i32> [#uses=1]
+  br i1 undef, label %cond_next1637, label %cond_true1607
+
+cond_true1607:                                    ; preds = %bb1567
+  ret void
+
+cond_next1637:                                    ; preds = %bb1567
+  %tmp1662 = sub i32 %tmp1602, 0                  ; <i32> [#uses=0]
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
new file mode 100644
index 0000000..b158afd
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=local
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast
+; rdar://problem/7948106
+;; This test would spill %R4 before the call to zz, but it forgot to move the
+; 'last use' marker to the spill.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin"
+
+%struct.q = type { i32, i32 }
+
+@.str = external constant [1 x i8]                ; <[1 x i8]*> [#uses=1]
+
+define arm_apcscc void @yy(%struct.q* %qq) nounwind {
+entry:
+  %vla6 = alloca i8, i32 undef, align 1           ; <i8*> [#uses=1]
+  %vla10 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %vla14 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %vla18 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %tmp21 = load i32* undef                        ; <i32> [#uses=1]
+  %0 = mul i32 1, %tmp21                          ; <i32> [#uses=1]
+  %vla22 = alloca i8, i32 %0, align 1             ; <i8*> [#uses=1]
+  call arm_apcscc  void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
+  br i1 undef, label %if.then, label %if.end36
+
+if.then:                                          ; preds = %entry
+  %call = call arm_apcscc  i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
+  %call35 = call arm_apcscc  i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
+  unreachable
+
+if.end36:                                         ; preds = %entry
+  ret void
+}
+
+declare arm_apcscc void @zz(...)
+
+declare arm_apcscc i32 @x(...)
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
new file mode 100644
index 0000000..9907228
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB
+; rdar://7998649
+
+%struct.foo = type { i64, i64 }
+
+define arm_apcscc zeroext i8 @t(%struct.foo* %this) noreturn optsize {
+entry:
+; ARM:       t:
+; ARM:       str r0, [r1], r0
+
+; THUMB:     t:
+; THUMB-NOT: str r0, [r1], r0
+; THUMB:     str r0, [r1]
+  %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
+  store i32 undef, i32* inttoptr (i32 8 to i32*), align 8
+  br i1 undef, label %bb.nph96, label %bb3
+
+bb3:                                              ; preds = %entry
+  %1 = load i64* %0, align 4                      ; <i64> [#uses=0]
+  unreachable
+
+bb.nph96:                                         ; preds = %entry
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2010-05-19-Shuffles.ll b/test/CodeGen/ARM/2010-05-19-Shuffles.ll
new file mode 100644
index 0000000..587c0af
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-19-Shuffles.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; pr7167
+
+define <8 x i8> @f1(<8 x i8> %x) nounwind {
+  %y = shufflevector <8 x i8> %x, <8 x i8> undef,
+       <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+  ret <8 x i8> %y
+}
+
+define <8 x i8> @f2(<8 x i8> %x) nounwind {
+  %y = shufflevector <8 x i8> %x, <8 x i8> undef,
+       <8 x i32> <i32 1, i32 2, i32 0, i32 5, i32 3, i32 6, i32 7, i32 4>
+  ret <8 x i8> %y
+}
+
+define void @f3(<4 x i64>* %xp) nounwind {
+  %x = load <4 x i64>* %xp
+  %y = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  store <4 x i64> %y, <4 x i64>* %xp
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
new file mode 100644
index 0000000..b6fbf9b
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=arm -mattr=+neon -O0
+
+; This test would crash the rewriter when trying to handle a spill after one of
+; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+
+define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1]
+  %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> %tmp2b, %tmp2d           ; <<8 x i8>> [#uses=1]
+  %tmp4bd = add <8 x i8> %tmp4b, %tmp4d           ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> undef, %tmp2bd         ; <<8 x i8>> [#uses=1]
+  %tmp4abcd = mul <8 x i8> undef, %tmp4bd         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd)
+  %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f           ; <<8 x i8>> [#uses=1]
+  %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h           ; <<8 x i8>> [#uses=1]
+  %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h  ; <<8 x i8>> [#uses=1]
+  %tmp4ef = sub <8 x i8> zeroinitializer, %tmp4g  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh       ; <<8 x i8>> [#uses=1]
+  %tmp3efgh = mul <8 x i8> undef, %tmp3gh         ; <<8 x i8>> [#uses=1]
+  %tmp4efgh = mul <8 x i8> %tmp4ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh)
+  %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd       ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef)
+  ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
new file mode 100644
index 0000000..6b19490
--- /dev/null
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Radar 7872877
+
+define arm_apcscc void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
+entry:
+  %0 = load float* %fltp
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = shl i32 %packedValue, 16
+  %4 = ashr i32 %3, 30
+  %.sum = add i32 %4, 4
+  %5 = getelementptr inbounds float* %table, i32 %.sum
+;CHECK: vldr.32 s
+  %6 = load float* %5, align 4
+  %tmp11 = insertelement <4 x float> undef, float %6, i32 0
+  %7 = shl i32 %packedValue, 18
+  %8 = ashr i32 %7, 30
+  %.sum12 = add i32 %8, 4
+  %9 = getelementptr inbounds float* %table, i32 %.sum12
+;CHECK: vldr.32 s
+  %10 = load float* %9, align 4
+  %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
+  %11 = shl i32 %packedValue, 20
+  %12 = ashr i32 %11, 30
+  %.sum13 = add i32 %12, 4
+  %13 = getelementptr inbounds float* %table, i32 %.sum13
+;CHECK: vldr.32 s
+  %14 = load float* %13, align 4
+  %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
+  %15 = shl i32 %packedValue, 22
+  %16 = ashr i32 %15, 30
+  %.sum14 = add i32 %16, 4
+  %17 = getelementptr inbounds float* %table, i32 %.sum14
+;CHECK: vldr.32 s
+  %18 = load float* %17, align 4
+  %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
+  %19 = fmul <4 x float> %tmp5, %2
+  %20 = bitcast float* %fltp to i8*
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll
index 2739860..1c7ac25 100644
--- a/test/CodeGen/ARM/arm-frameaddr.ll
+++ b/test/CodeGen/ARM/arm-frameaddr.ll
@@ -1,10 +1,15 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin  | grep mov | grep r7
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | grep r11
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX
 ; PR4344
 ; PR4416
 
 define arm_aapcscc i8* @t() nounwind {
 entry:
+; DARWIN: t:
+; DARWIN: mov r0, r7
+
+; LINUX: t:
+; LINUX: mov r0, r11
 	%0 = call i8* @llvm.frameaddress(i32 0)
         ret i8* %0
 }
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
new file mode 100644
index 0000000..2c8f2ab
--- /dev/null
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+; rdar://8015977
+; rdar://8020118
+
+define arm_apcscc i8* @rt0(i32 %x) nounwind readnone {
+entry:
+; CHECK: rt0:
+; CHECK: mov r0, lr
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define arm_apcscc i8* @rt2() nounwind readnone {
+entry:
+; CHECK: rt2:
+; CHECK: ldr r0, [r7]
+; CHECK: ldr r0, [r0]
+; CHECK: ldr r0, [r0, #4]
+  %0 = tail call i8* @llvm.returnaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 2f724e7..d833afa 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,29 +1,43 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep __divsi3  %t
-; RUN: grep __udivsi3 %t
-; RUN: grep __modsi3  %t
-; RUN: grep __umodsi3 %t
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -march=arm -mcpu=cortex-m3 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-ARMV7M
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
+; CHECK-ARM: f1
+; CHECK-ARM: __divsi3
+; CHECK-ARMV7M: f1
+; CHECK-ARMV7M: sdiv
         %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f2(i32 %a, i32 %b) {
 entry:
+; CHECK-ARM: f2
+; CHECK-ARM: __udivsi3
+; CHECK-ARMV7M: f2
+; CHECK-ARMV7M: udiv
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
 entry:
+; CHECK-ARM: f3
+; CHECK-ARM: __modsi3
+; CHECK-ARMV7M: f3
+; CHECK-ARMV7M: sdiv
         %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
 entry:
+; CHECK-ARM: f4
+; CHECK-ARM: __umodsi3
+; CHECK-ARMV7M: f4
+; CHECK-ARMV7M: udiv
         %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index f03282b..dfc1e0a 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -24,4 +24,4 @@ declare float @fabsf(float)
 ; CORTEXA8: test:
 ; CORTEXA8: 	vabs.f32	d1, d1
 ; CORTEXA9: test:
-; CORTEXA9: 	vabs.f32	s1, s1
+; CORTEXA9: 	vabs.f32	s0, s0
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 749690e..113f0e2 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vadd.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s0, s1, s0
+; CORTEXA9: 	vadd.f32	s0, s0, s1
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 0c31495..9af1217 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vdiv.f32	s0, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vdiv.f32	s0, s1, s0
+; CORTEXA9: 	vdiv.f32	s0, s0, s1
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index f8b47b5..c4ceca9 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -21,4 +21,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmla.f32	s2, s1, s0
+; CORTEXA9: 	vmla.f32	s0, s1, s2
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index 7a70543..103ce33 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -21,4 +21,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vnmls.f32	s2, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vnmls.f32	s2, s1, s0
+; CORTEXA9: 	vnmls.f32	s0, s1, s2
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index ef4e3e5..bfafd20 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s0, s1, s0
+; CORTEXA9: 	vmul.f32	s0, s0, s1
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 6b7cefa..0b47edd 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
 
 define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
 entry:
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
@@ -13,7 +13,7 @@ entry:
 }
 
 define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
 entry:
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
new file mode 100644
index 0000000..2ac4084
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -0,0 +1,642 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 < %s | FileCheck %s
+
+; LSR should recognize that this is an unrolled loop which can use
+; constant offset addressing, so that each of the following stores
+; uses the same register.
+
+; CHECK: vstr.32 s0, [r12, #-128]
+; CHECK: vstr.32 s0, [r12, #-96]
+; CHECK: vstr.32 s0, [r12, #-64]
+; CHECK: vstr.32 s0, [r12, #-32]
+; CHECK: vstr.32 s0, [r12]
+; CHECK: vstr.32 s0, [r12, #32]
+; CHECK: vstr.32 s0, [r12, #64]
+; CHECK: vstr.32 s0, [r12, #96]
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
+%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
+%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
+%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
+%4 = type opaque
+%5 = type opaque
+%6 = type { void (%2*)*, i32, i32, i32, i32 }
+%7 = type { [8 x i32], [12 x i32] }
+%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
+%9 = type { [64 x i16], i32 }
+%10 = type { [17 x i8], [256 x i8], i32 }
+%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
+%12 = type { %12*, i8, i32, i32, i8* }
+%13 = type { void (%0*)*, void (%0*)*, i32 }
+%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
+%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
+%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
+%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
+%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
+%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
+%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
+%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
+%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
+
+define arm_apcscc void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
+bb:
+  %t = alloca [64 x float], align 4           
+  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
+  %t6 = load i8** %t5, align 4              
+  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
+  %t8 = load i8** %t7, align 4              
+  br label %bb9
+
+bb9:                                            
+  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
+  %t11 = add i32 %t10, 8                    
+  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
+  %t13 = add i32 %t10, 16                   
+  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
+  %t15 = add i32 %t10, 24                   
+  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
+  %t17 = add i32 %t10, 32                   
+  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
+  %t19 = add i32 %t10, 40                   
+  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
+  %t21 = add i32 %t10, 48                   
+  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
+  %t23 = add i32 %t10, 56                   
+  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
+  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
+  %t26 = shl i32 %t10, 5                    
+  %t27 = or i32 %t26, 8                     
+  %t28 = getelementptr i8* %t8, i32 %t27  
+  %t29 = bitcast i8* %t28 to float*         
+  %t30 = or i32 %t26, 16                    
+  %t31 = getelementptr i8* %t8, i32 %t30  
+  %t32 = bitcast i8* %t31 to float*         
+  %t33 = or i32 %t26, 24                    
+  %t34 = getelementptr i8* %t8, i32 %t33  
+  %t35 = bitcast i8* %t34 to float*         
+  %t36 = or i32 %t26, 4                     
+  %t37 = getelementptr i8* %t8, i32 %t36  
+  %t38 = bitcast i8* %t37 to float*         
+  %t39 = or i32 %t26, 12                    
+  %t40 = getelementptr i8* %t8, i32 %t39  
+  %t41 = bitcast i8* %t40 to float*         
+  %t42 = or i32 %t26, 20                    
+  %t43 = getelementptr i8* %t8, i32 %t42  
+  %t44 = bitcast i8* %t43 to float*         
+  %t45 = or i32 %t26, 28                    
+  %t46 = getelementptr i8* %t8, i32 %t45  
+  %t47 = bitcast i8* %t46 to float*         
+  %t48 = getelementptr i8* %t8, i32 %t26  
+  %t49 = bitcast i8* %t48 to float*         
+  %t50 = shl i32 %t10, 3                    
+  %t51 = or i32 %t50, 1                     
+  %t52 = getelementptr i16* %a2, i32 %t51 
+  %t53 = or i32 %t50, 2                     
+  %t54 = getelementptr i16* %a2, i32 %t53 
+  %t55 = or i32 %t50, 3                     
+  %t56 = getelementptr i16* %a2, i32 %t55 
+  %t57 = or i32 %t50, 4                     
+  %t58 = getelementptr i16* %a2, i32 %t57 
+  %t59 = or i32 %t50, 5                     
+  %t60 = getelementptr i16* %a2, i32 %t59 
+  %t61 = or i32 %t50, 6                     
+  %t62 = getelementptr i16* %a2, i32 %t61 
+  %t63 = or i32 %t50, 7                     
+  %t64 = getelementptr i16* %a2, i32 %t63 
+  %t65 = getelementptr i16* %a2, i32 %t50 
+  %t66 = load i16* %t52, align 2            
+  %t67 = icmp eq i16 %t66, 0                
+  %t68 = load i16* %t54, align 2            
+  %t69 = icmp eq i16 %t68, 0                
+  %t70 = and i1 %t67, %t69                
+  br i1 %t70, label %bb71, label %bb91
+
+bb71:                                           
+  %t72 = load i16* %t56, align 2            
+  %t73 = icmp eq i16 %t72, 0                
+  br i1 %t73, label %bb74, label %bb91
+
+bb74:                                           
+  %t75 = load i16* %t58, align 2            
+  %t76 = icmp eq i16 %t75, 0                
+  br i1 %t76, label %bb77, label %bb91
+
+bb77:                                           
+  %t78 = load i16* %t60, align 2            
+  %t79 = icmp eq i16 %t78, 0                
+  br i1 %t79, label %bb80, label %bb91
+
+bb80:                                           
+  %t81 = load i16* %t62, align 2            
+  %t82 = icmp eq i16 %t81, 0                
+  br i1 %t82, label %bb83, label %bb91
+
+bb83:                                           
+  %t84 = load i16* %t64, align 2            
+  %t85 = icmp eq i16 %t84, 0                
+  br i1 %t85, label %bb86, label %bb91
+
+bb86:                                           
+  %t87 = load i16* %t65, align 2            
+  %t88 = sitofp i16 %t87 to float           
+  %t89 = load float* %t49, align 4          
+  %t90 = fmul float %t88, %t89            
+  store float %t90, float* %t25, align 4
+  store float %t90, float* %t12, align 4
+  store float %t90, float* %t14, align 4
+  store float %t90, float* %t16, align 4
+  store float %t90, float* %t18, align 4
+  store float %t90, float* %t20, align 4
+  store float %t90, float* %t22, align 4
+  store float %t90, float* %t24, align 4
+  br label %bb156
+
+bb91:                                           
+  %t92 = load i16* %t65, align 2            
+  %t93 = sitofp i16 %t92 to float           
+  %t94 = load float* %t49, align 4          
+  %t95 = fmul float %t93, %t94            
+  %t96 = sitofp i16 %t68 to float           
+  %t97 = load float* %t29, align 4          
+  %t98 = fmul float %t96, %t97            
+  %t99 = load i16* %t58, align 2            
+  %t100 = sitofp i16 %t99 to float          
+  %t101 = load float* %t32, align 4         
+  %t102 = fmul float %t100, %t101         
+  %t103 = load i16* %t62, align 2           
+  %t104 = sitofp i16 %t103 to float         
+  %t105 = load float* %t35, align 4         
+  %t106 = fmul float %t104, %t105         
+  %t107 = fadd float %t95, %t102          
+  %t108 = fsub float %t95, %t102          
+  %t109 = fadd float %t98, %t106          
+  %t110 = fsub float %t98, %t106          
+  %t111 = fmul float %t110, 0x3FF6A09E60000000
+  %t112 = fsub float %t111, %t109         
+  %t113 = fadd float %t107, %t109         
+  %t114 = fsub float %t107, %t109         
+  %t115 = fadd float %t108, %t112         
+  %t116 = fsub float %t108, %t112         
+  %t117 = sitofp i16 %t66 to float          
+  %t118 = load float* %t38, align 4         
+  %t119 = fmul float %t117, %t118         
+  %t120 = load i16* %t56, align 2           
+  %t121 = sitofp i16 %t120 to float         
+  %t122 = load float* %t41, align 4         
+  %t123 = fmul float %t121, %t122         
+  %t124 = load i16* %t60, align 2           
+  %t125 = sitofp i16 %t124 to float         
+  %t126 = load float* %t44, align 4         
+  %t127 = fmul float %t125, %t126         
+  %t128 = load i16* %t64, align 2           
+  %t129 = sitofp i16 %t128 to float         
+  %t130 = load float* %t47, align 4         
+  %t131 = fmul float %t129, %t130         
+  %t132 = fadd float %t127, %t123         
+  %t133 = fsub float %t127, %t123         
+  %t134 = fadd float %t119, %t131         
+  %t135 = fsub float %t119, %t131         
+  %t136 = fadd float %t134, %t132         
+  %t137 = fsub float %t134, %t132         
+  %t138 = fmul float %t137, 0x3FF6A09E60000000
+  %t139 = fadd float %t133, %t135         
+  %t140 = fmul float %t139, 0x3FFD906BC0000000
+  %t141 = fmul float %t135, 0x3FF1517A80000000
+  %t142 = fsub float %t141, %t140         
+  %t143 = fmul float %t133, 0xC004E7AEA0000000
+  %t144 = fadd float %t143, %t140         
+  %t145 = fsub float %t144, %t136         
+  %t146 = fsub float %t138, %t145         
+  %t147 = fadd float %t142, %t146         
+  %t148 = fadd float %t113, %t136         
+  store float %t148, float* %t25, align 4
+  %t149 = fsub float %t113, %t136         
+  store float %t149, float* %t24, align 4
+  %t150 = fadd float %t115, %t145         
+  store float %t150, float* %t12, align 4
+  %t151 = fsub float %t115, %t145         
+  store float %t151, float* %t22, align 4
+  %t152 = fadd float %t116, %t146         
+  store float %t152, float* %t14, align 4
+  %t153 = fsub float %t116, %t146         
+  store float %t153, float* %t20, align 4
+  %t154 = fadd float %t114, %t147         
+  store float %t154, float* %t18, align 4
+  %t155 = fsub float %t114, %t147         
+  store float %t155, float* %t16, align 4
+  br label %bb156
+
+bb156:                                          
+  %t157 = add i32 %t10, 1                   
+  %t158 = icmp eq i32 %t157, 8              
+  br i1 %t158, label %bb159, label %bb9
+
+bb159:                                          
+  %t160 = add i32 %a4, 7                    
+  %t161 = add i32 %a4, 1                    
+  %t162 = add i32 %a4, 6                    
+  %t163 = add i32 %a4, 2                    
+  %t164 = add i32 %a4, 5                    
+  %t165 = add i32 %a4, 4                    
+  %t166 = add i32 %a4, 3                    
+  br label %bb167
+
+bb167:                                          
+  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
+  %t169 = getelementptr i8** %a3, i32 %t168
+  %t170 = shl i32 %t168, 3                  
+  %t171 = or i32 %t170, 4                   
+  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
+  %t173 = or i32 %t170, 2                   
+  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
+  %t175 = or i32 %t170, 6                   
+  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
+  %t177 = or i32 %t170, 5                   
+  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
+  %t179 = or i32 %t170, 3                   
+  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
+  %t181 = or i32 %t170, 1                   
+  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
+  %t183 = or i32 %t170, 7                   
+  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
+  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
+  %t186 = load i8** %t169, align 4          
+  %t187 = getelementptr inbounds i8* %t186, i32 %a4
+  %t188 = load float* %t185, align 4        
+  %t189 = load float* %t172, align 4        
+  %t190 = fadd float %t188, %t189         
+  %t191 = fsub float %t188, %t189         
+  %t192 = load float* %t174, align 4        
+  %t193 = load float* %t176, align 4        
+  %t194 = fadd float %t192, %t193         
+  %t195 = fsub float %t192, %t193         
+  %t196 = fmul float %t195, 0x3FF6A09E60000000
+  %t197 = fsub float %t196, %t194         
+  %t198 = fadd float %t190, %t194         
+  %t199 = fsub float %t190, %t194         
+  %t200 = fadd float %t191, %t197         
+  %t201 = fsub float %t191, %t197         
+  %t202 = load float* %t178, align 4        
+  %t203 = load float* %t180, align 4        
+  %t204 = fadd float %t202, %t203         
+  %t205 = fsub float %t202, %t203         
+  %t206 = load float* %t182, align 4        
+  %t207 = load float* %t184, align 4        
+  %t208 = fadd float %t206, %t207         
+  %t209 = fsub float %t206, %t207         
+  %t210 = fadd float %t208, %t204         
+  %t211 = fsub float %t208, %t204         
+  %t212 = fmul float %t211, 0x3FF6A09E60000000
+  %t213 = fadd float %t205, %t209         
+  %t214 = fmul float %t213, 0x3FFD906BC0000000
+  %t215 = fmul float %t209, 0x3FF1517A80000000
+  %t216 = fsub float %t215, %t214         
+  %t217 = fmul float %t205, 0xC004E7AEA0000000
+  %t218 = fadd float %t217, %t214         
+  %t219 = fsub float %t218, %t210         
+  %t220 = fsub float %t212, %t219         
+  %t221 = fadd float %t216, %t220         
+  %t222 = fadd float %t198, %t210         
+  %t223 = fptosi float %t222 to i32         
+  %t224 = add nsw i32 %t223, 4              
+  %t225 = lshr i32 %t224, 3                 
+  %t226 = and i32 %t225, 1023               
+  %t227 = add i32 %t226, 128                
+  %t228 = getelementptr inbounds i8* %t6, i32 %t227
+  %t229 = load i8* %t228, align 1           
+  store i8 %t229, i8* %t187, align 1
+  %t230 = fsub float %t198, %t210         
+  %t231 = fptosi float %t230 to i32         
+  %t232 = add nsw i32 %t231, 4              
+  %t233 = lshr i32 %t232, 3                 
+  %t234 = and i32 %t233, 1023               
+  %t235 = add i32 %t234, 128                
+  %t236 = getelementptr inbounds i8* %t6, i32 %t235
+  %t237 = load i8* %t236, align 1           
+  %t238 = getelementptr inbounds i8* %t186, i32 %t160
+  store i8 %t237, i8* %t238, align 1
+  %t239 = fadd float %t200, %t219         
+  %t240 = fptosi float %t239 to i32         
+  %t241 = add nsw i32 %t240, 4              
+  %t242 = lshr i32 %t241, 3                 
+  %t243 = and i32 %t242, 1023               
+  %t244 = add i32 %t243, 128                
+  %t245 = getelementptr inbounds i8* %t6, i32 %t244
+  %t246 = load i8* %t245, align 1           
+  %t247 = getelementptr inbounds i8* %t186, i32 %t161
+  store i8 %t246, i8* %t247, align 1
+  %t248 = fsub float %t200, %t219         
+  %t249 = fptosi float %t248 to i32         
+  %t250 = add nsw i32 %t249, 4              
+  %t251 = lshr i32 %t250, 3                 
+  %t252 = and i32 %t251, 1023               
+  %t253 = add i32 %t252, 128                
+  %t254 = getelementptr inbounds i8* %t6, i32 %t253
+  %t255 = load i8* %t254, align 1           
+  %t256 = getelementptr inbounds i8* %t186, i32 %t162
+  store i8 %t255, i8* %t256, align 1
+  %t257 = fadd float %t201, %t220         
+  %t258 = fptosi float %t257 to i32         
+  %t259 = add nsw i32 %t258, 4              
+  %t260 = lshr i32 %t259, 3                 
+  %t261 = and i32 %t260, 1023               
+  %t262 = add i32 %t261, 128                
+  %t263 = getelementptr inbounds i8* %t6, i32 %t262
+  %t264 = load i8* %t263, align 1           
+  %t265 = getelementptr inbounds i8* %t186, i32 %t163
+  store i8 %t264, i8* %t265, align 1
+  %t266 = fsub float %t201, %t220         
+  %t267 = fptosi float %t266 to i32         
+  %t268 = add nsw i32 %t267, 4              
+  %t269 = lshr i32 %t268, 3                 
+  %t270 = and i32 %t269, 1023               
+  %t271 = add i32 %t270, 128                
+  %t272 = getelementptr inbounds i8* %t6, i32 %t271
+  %t273 = load i8* %t272, align 1           
+  %t274 = getelementptr inbounds i8* %t186, i32 %t164
+  store i8 %t273, i8* %t274, align 1
+  %t275 = fadd float %t199, %t221         
+  %t276 = fptosi float %t275 to i32         
+  %t277 = add nsw i32 %t276, 4              
+  %t278 = lshr i32 %t277, 3                 
+  %t279 = and i32 %t278, 1023               
+  %t280 = add i32 %t279, 128                
+  %t281 = getelementptr inbounds i8* %t6, i32 %t280
+  %t282 = load i8* %t281, align 1           
+  %t283 = getelementptr inbounds i8* %t186, i32 %t165
+  store i8 %t282, i8* %t283, align 1
+  %t284 = fsub float %t199, %t221         
+  %t285 = fptosi float %t284 to i32         
+  %t286 = add nsw i32 %t285, 4              
+  %t287 = lshr i32 %t286, 3                 
+  %t288 = and i32 %t287, 1023               
+  %t289 = add i32 %t288, 128                
+  %t290 = getelementptr inbounds i8* %t6, i32 %t289
+  %t291 = load i8* %t290, align 1           
+  %t292 = getelementptr inbounds i8* %t186, i32 %t166
+  store i8 %t291, i8* %t292, align 1
+  %t293 = add nsw i32 %t168, 1              
+  %t294 = icmp eq i32 %t293, 8              
+  br i1 %t294, label %bb295, label %bb167
+
+bb295:                                          
+  ret void
+}
+
+%struct.ct_data_s = type { %union.anon, %union.anon }
+%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
+%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
+%struct.static_tree_desc = type { i32 }
+%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
+%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
+%union.anon = type { i16 }
+
+define arm_apcscc i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
+entry:
+  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
+  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
+  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
+  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
+  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
+  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
+  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
+  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
+  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
+  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
+  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
+  br i1 %14, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
+  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
+  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
+  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
+  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
+  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
+  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
+  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
+  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
+  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
+  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
+  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
+  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
+  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
+  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
+  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
+  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
+  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
+  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
+  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
+  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
+  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
+  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
+  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
+  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
+  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
+  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
+  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
+  br label %bb6
+
+bb6:                                              ; preds = %bb24, %bb2
+  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
+  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
+  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
+  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
+  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
+  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
+  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
+  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
+  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
+  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
+  br i1 %40, label %bb7, label %bb23
+
+bb7:                                              ; preds = %bb6
+  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
+  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
+  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
+  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
+  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
+  br i1 %44, label %bb8, label %bb23
+
+bb8:                                              ; preds = %bb7
+  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
+  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
+  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
+  br i1 %47, label %bb9, label %bb23
+
+bb9:                                              ; preds = %bb8
+  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
+  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
+  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
+  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
+  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
+  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
+  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
+  br i1 %52, label %bb10, label %bb23
+
+bb10:                                             ; preds = %bb9
+  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
+  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
+  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
+  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
+  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
+  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
+  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
+  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb18, %bb10
+  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
+  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
+  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
+  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
+  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
+  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
+  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
+  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
+  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
+  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
+  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
+  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
+  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
+  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
+  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
+  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
+  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
+  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
+  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
+  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
+  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
+  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
+  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
+  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
+  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
+  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
+  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
+  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
+  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
+  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
+  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
+  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
+  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
+  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
+  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
+  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
+  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
+  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
+  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
+  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
+  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
+  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
+  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
+  br i1 %55, label %bb12, label %bb20
+
+bb12:                                             ; preds = %bb11
+  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
+  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
+  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
+  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
+  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
+  br i1 %58, label %bb13, label %bb20
+
+bb13:                                             ; preds = %bb12
+  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
+  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
+  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
+  br i1 %61, label %bb14, label %bb20
+
+bb14:                                             ; preds = %bb13
+  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
+  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
+  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
+  br i1 %64, label %bb15, label %bb20
+
+bb15:                                             ; preds = %bb14
+  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
+  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
+  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
+  br i1 %67, label %bb16, label %bb20
+
+bb16:                                             ; preds = %bb15
+  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
+  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
+  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
+  br i1 %70, label %bb17, label %bb20
+
+bb17:                                             ; preds = %bb16
+  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
+  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
+  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
+  br i1 %73, label %bb18, label %bb20
+
+bb18:                                             ; preds = %bb17
+  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
+  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
+  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
+  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
+  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  br i1 %or.cond, label %bb11, label %bb20
+
+bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
+  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
+  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
+  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
+  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
+  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
+  br i1 %81, label %bb21, label %bb23
+
+bb21:                                             ; preds = %bb20
+  store i32 %cur_match_addr.0, i32* %34, align 4
+  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
+  br i1 %82, label %bb22, label %bb25
+
+bb22:                                             ; preds = %bb21
+  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
+  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
+  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
+  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
+  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
+  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
+  br label %bb23
+
+bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
+  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
+  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
+  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
+  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
+  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
+  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
+  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
+  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
+  br i1 %91, label %bb24, label %bb25
+
+bb24:                                             ; preds = %bb23
+
+; LSR should use count-down iteration to avoid requiring the trip count
+; in a register, and it shouldn't require any reloads here.
+
+; CHECK:      sub.w   r9, r9, #1
+; CHECK-NEXT: cmp.w   r9, #0
+; CHECK-NEXT: bne.w   
+
+  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
+  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
+  br i1 %92, label %bb25, label %bb6
+
+bb25:                                             ; preds = %bb24, %bb23, %bb21
+  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
+  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
+  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
+  ret i32 %merge
+}
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 93188cd..8c10246 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -1,17 +1,43 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
-define i32 @t1(i32 %v) nounwind readnone {
+define i32 @t9(i32 %v) nounwind readnone {
 entry:
-; CHECK: t1:
+; CHECK: t9:
 ; CHECK: add r0, r0, r0, lsl #3
 	%0 = mul i32 %v, 9
 	ret i32 %0
 }
 
-define i32 @t2(i32 %v) nounwind readnone {
+define i32 @t7(i32 %v) nounwind readnone {
 entry:
-; CHECK: t2:
+; CHECK: t7:
 ; CHECK: rsb r0, r0, r0, lsl #3
 	%0 = mul i32 %v, 7
 	ret i32 %0
 }
+
+define i32 @t5(i32 %v) nounwind readnone {
+entry:
+; CHECK: t5:
+; CHECK: add r0, r0, r0, lsl #2
+        %0 = mul i32 %v, 5
+        ret i32 %0
+}
+
+define i32 @t3(i32 %v) nounwind readnone {
+entry:
+; CHECK: t3:
+; CHECK: add r0, r0, r0, lsl #1
+        %0 = mul i32 %v, 3
+        ret i32 %0
+}
+
+define i32 @t12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: t12288:
+; CHECK: add r0, r0, r0, lsl #1
+; CHECK: mov     r0, r0, lsl #12
+        %0 = mul i32 %v, 12288
+        ret i32 %0
+}
+
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
new file mode 100644
index 0000000..3ba82cc
--- /dev/null
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -0,0 +1,348 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int32x4_t = type { <4 x i32> }
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+
+define arm_apcscc void @t1(i16* %i_ptr, i16* %o_ptr, %struct.int32x4_t* nocapture %vT0ptr, %struct.int32x4_t* nocapture %vT1ptr) nounwind {
+entry:
+; CHECK:        t1:
+; CHECK:        vld1.16
+; CHECK-NOT:    vmov d
+; CHECK:        vmovl.s16
+; CHECK:        vshrn.i32
+; CHECK:        vshrn.i32
+; CHECK-NOT:    vmov d
+; CHECK-NEXT:   vst1.16
+  %0 = getelementptr inbounds %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1]
+  %6 = bitcast <8 x i16> %5 to <2 x double>       ; <<2 x double>> [#uses=2]
+  %7 = extractelement <2 x double> %6, i32 0      ; <double> [#uses=1]
+  %8 = bitcast double %7 to <4 x i16>             ; <<4 x i16>> [#uses=1]
+  %9 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %8) ; <<4 x i32>> [#uses=1]
+  %10 = extractelement <2 x double> %6, i32 1     ; <double> [#uses=1]
+  %11 = bitcast double %10 to <4 x i16>           ; <<4 x i16>> [#uses=1]
+  %12 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %11) ; <<4 x i32>> [#uses=1]
+  %13 = mul <4 x i32> %1, %9                      ; <<4 x i32>> [#uses=1]
+  %14 = mul <4 x i32> %3, %12                     ; <<4 x i32>> [#uses=1]
+  %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
+  %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
+  %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
+  %18 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17)
+  ret void
+}
+
+define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, %struct.int16x8_t* nocapture %vT1ptr) nounwind {
+entry:
+; CHECK:        t2:
+; CHECK:        vld1.16
+; CHECK:        vld1.16
+; CHECK-NOT:    vmov
+; CHECK:        vmul.i16
+; CHECK:        vmul.i16
+; CHECK-NOT:    vmov
+; CHECK:        vst1.16
+; CHECK:        vst1.16
+  %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %3 = load <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
+  %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1]
+  %6 = getelementptr inbounds i16* %i_ptr, i32 8  ; <i16*> [#uses=1]
+  %7 = bitcast i16* %6 to i8*                     ; <i8*> [#uses=1]
+  %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7) ; <<8 x i16>> [#uses=1]
+  %9 = mul <8 x i16> %1, %5                       ; <<8 x i16>> [#uses=1]
+  %10 = mul <8 x i16> %3, %8                      ; <<8 x i16>> [#uses=1]
+  %11 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9)
+  %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
+  %13 = bitcast i16* %12 to i8*                   ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10)
+  ret void
+}
+
+define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
+; CHECK:        t3:
+; CHECK:        vld3.8
+; CHECK:        vmul.i8
+; CHECK-NOT:    vmov
+; CHECK:        vst3.8
+  %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
+  %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1]
+  %tmp5 = sub <8 x i8> %tmp3, %tmp4
+  %tmp6 = add <8 x i8> %tmp2, %tmp3               ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> %tmp4, %tmp2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+  ret <8 x i8> %tmp4
+}
+
+define arm_apcscc void @t4(i32* %in, i32* %out) nounwind {
+entry:
+; CHECK:        t4:
+; CHECK:        vld2.32
+; CHECK-NOT:    vmov
+; CHECK:        vld2.32
+; CHECK-NOT:    vmov
+; CHECK:        bne
+  %tmp1 = bitcast i32* %in to i8*                 ; <i8*> [#uses=1]
+  %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp3 = getelementptr inbounds i32* %in, i32 8  ; <i32*> [#uses=1]
+  %tmp4 = bitcast i32* %tmp3 to i8*               ; <i8*> [#uses=1]
+  %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp8 = bitcast i32* %out to i8*                ; <i8*> [#uses=1]
+  br i1 undef, label %return1, label %return2
+
+return1:
+; CHECK:        %return1
+; CHECK-NOT:    vmov
+; CHECK-NEXT:   vadd.i32
+; CHECK-NEXT:   vadd.i32
+; CHECK-NEXT:   vst2.32
+  %tmp52 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
+  %tmp57 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 ; <<4 x i32>> [#uses=1]
+  %tmp = extractvalue %struct.__neon_int32x4x2_t %tmp5, 0 ; <<4 x i32>> [#uses=1]
+  %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
+  %tmp6 = add <4 x i32> %tmp52, %tmp              ; <<4 x i32>> [#uses=1]
+  %tmp7 = add <4 x i32> %tmp57, %tmp39            ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7)
+  ret void
+
+return2:
+; CHECK:        %return2
+; CHECK:        vadd.i32
+; CHECK:        vmov q1, q3
+; CHECK-NOT:    vmov
+; CHECK:        vst2.32 {d0, d1, d2, d3}
+  %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
+  %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
+  %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101)
+  call void @llvm.trap()
+  unreachable
+}
+
+define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
+; CHECK:        t5:
+; CHECK:        vldmia
+; CHECK:        vmov q1, q0
+; CHECK-NOT:    vmov
+; CHECK:        vld2.16 {d0[1], d2[1]}, [r0]
+; CHECK-NOT:    vmov
+; CHECK:        vadd.i16
+  %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
+  %tmp1 = load <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
+  %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
+  %tmp5 = add <8 x i16> %tmp3, %tmp4              ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %tmp5
+}
+
+define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
+; CHECK:        t6:
+; CHECK:        vldr.64
+; CHECK:        vmov d1, d0
+; CHECK-NEXT:   vld2.8 {d0[1], d1[1]}
+  %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
+  %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+  %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
+  %tmp5 = add <8 x i8> %tmp3, %tmp4               ; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp5
+}
+
+define arm_apcscc void @t7(i32* %iptr, i32* %optr) nounwind {
+entry:
+; CHECK:        t7:
+; CHECK:        vld2.32
+; CHECK:        vst2.32
+; CHECK:        vld1.32 {d0, d1},
+; CHECK:        vmov q1, q0
+; CHECK-NOT:    vmov
+; CHECK:        vuzp.32 q0, q1
+; CHECK:        vst1.32
+  %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
+  %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
+  %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
+  %2 = bitcast i32* %optr to i8*                  ; <i8*> [#uses=2]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60)
+  %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1]
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4)
+  ret void
+}
+
+; PR7156
+define arm_aapcs_vfpcc i32 @t8() nounwind {
+; CHECK: t8:
+; CHECK: vrsqrte.f32 q0, q0
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb5, label %bb3
+
+bb5:                                              ; preds = %bb3
+  br label %bb.i25
+
+bb.i25:                                           ; preds = %bb.i25, %bb5
+  %0 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %0) nounwind ; <<4 x float>> [#uses=1]
+  %2 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  %tmp26.i = bitcast <4 x float> %3 to <2 x double> ; <<2 x double>> [#uses=1]
+  %4 = extractelement <2 x double> %tmp26.i, i32 0 ; <double> [#uses=1]
+  %5 = bitcast double %4 to <2 x float>           ; <<2 x float>> [#uses=1]
+  %6 = extractelement <2 x float> %5, i32 1       ; <float> [#uses=1]
+  store float %6, float* undef, align 4
+  br i1 undef, label %bb6, label %bb.i25
+
+bb6:                                              ; preds = %bb.i25
+  br i1 undef, label %bb7, label %bb14
+
+bb7:                                              ; preds = %bb6
+  br label %bb.i49
+
+bb.i49:                                           ; preds = %bb.i49, %bb7
+  br i1 undef, label %bb.i19, label %bb.i49
+
+bb.i19:                                           ; preds = %bb.i19, %bb.i49
+  br i1 undef, label %exit, label %bb.i19
+
+exit:          ; preds = %bb.i19
+  unreachable
+
+bb14:                                             ; preds = %bb6
+  ret i32 0
+}
+
+%0 = type { %1, %1, %1, %1 }
+%1 = type { %2 }
+%2 = type { <4 x float> }
+%3 = type { %0, %1 }
+
+; PR7157
+define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
+; CHECK:        t9:
+; CHECK:        vldr.64
+; CHECK:        vmov.i8 d1
+; CHECK-NEXT:   vstmia r0, {d2,d3}
+; CHECK-NEXT:   vstmia r0, {d0,d1}
+  %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
+  %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  store <4 x float> %4, <4 x float>* undef, align 16
+  %5 = shufflevector <2 x float> %3, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  store <4 x float> %5, <4 x float>* undef, align 16
+  br label %8
+
+; <label>:6                                       ; preds = %8
+  br i1 undef, label %7, label %10
+
+; <label>:7                                       ; preds = %6
+  br label %8
+
+; <label>:8                                       ; preds = %7, %2
+  br i1 undef, label %6, label %9
+
+; <label>:9                                       ; preds = %8
+  ret float undef
+
+; <label>:10                                      ; preds = %6
+  ret float 9.990000e+02
+}
+
+; PR7162
+define arm_aapcs_vfpcc i32 @t10() nounwind {
+entry:
+; CHECK: t10:
+; CHECK: vmov.i32 q1, #0x3F000000
+; CHECK: vdup.32 q0, d0[0]
+; CHECK: vmov d0, d1
+; CHECK: vmla.f32 q0, q0, d0[0]
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %tmp54.i = bitcast <4 x float> %3 to <2 x double> ; <<2 x double>> [#uses=1]
+  %4 = extractelement <2 x double> %tmp54.i, i32 1 ; <double> [#uses=1]
+  %5 = bitcast double %4 to <2 x float>           ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> undef, %6                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, undef                 ; <<4 x float>> [#uses=1]
+  %9 = fadd <4 x float> %8, undef                 ; <<4 x float>> [#uses=1]
+  %10 = shufflevector <4 x float> undef, <4 x float> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %13, undef               ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> undef, %14               ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %17 = fmul <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = extractelement <4 x float> %17, i32 2     ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  br i1 undef, label %exit, label %bb14
+
+exit:          ; preds = %bb.i19
+  unreachable
+
+bb14:                                             ; preds = %bb6
+  ret i32 0
+}
+
+; This test crashes the coalescer because live variables were not updated properly.
+define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d  ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
+  %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh)
+  %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd       ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> undef, %tmp2               ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7)
+  ret <8 x i8> undef
+}
+
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+
+declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 5ad7ecc..03de0c8 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -46,7 +46,8 @@ bb4:                                              ; preds = %bb193, %entry
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
-  br i1 undef, label %bb193, label %bb186
+  %tmp = extractelement <4 x i1> %22, i32 0
+  br i1 %tmp, label %bb193, label %bb186
 
 bb186:                                            ; preds = %bb4
   br label %bb193
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
new file mode 100644
index 0000000..763dff3
--- /dev/null
+++ b/test/CodeGen/ARM/trap.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; rdar://7961298
+
+define arm_apcscc void @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: trap
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 6b11ba5..194093c 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -158,5 +158,18 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	ret <4 x i32> %tmp3
 }
 
+; rdar://7923010
+define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgt_zext:
+;CHECK: vcgt.f32 q0
+;CHECK: vmov.i32 q1, #0x1
+;CHECK: vand q0, q0, q1
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll
new file mode 100644
index 0000000..d7d1ef4
--- /dev/null
+++ b/test/CodeGen/CellSPU/jumptable.ll
@@ -0,0 +1,21 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+; This is to check that emitting jumptables doesn't crash llc
+define i32 @test(i32 %param) {
+entry:
+;CHECK:        ai      $4, $3, -1
+;CHECK:        clgti   $5, $4, 3
+;CHECK:        brnz    $5,.LBB0_2
+  switch i32 %param, label %bb1 [
+    i32 1, label %bb3
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb1
+  ]
+
+bb1:                                            
+  ret i32 1
+bb2:      
+  ret i32 2
+bb3:     
+  ret i32 3
+}
diff --git a/test/CodeGen/CellSPU/sub_ops.ll b/test/CodeGen/CellSPU/sub_ops.ll
new file mode 100644
index 0000000..f0c40d3
--- /dev/null
+++ b/test/CodeGen/CellSPU/sub_ops.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+define i32 @subword( i32 %param1, i32 %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=rb-ra
+; CHECK-NOT:	sf	$3, $3, $4
+; CHECK:	sf	$3, $4, $3
+	%1 = sub i32 %param1, %param2
+	ret i32 %1
+}
+
+define i16 @subhword( i16 %param1, i16 %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=rb-ra
+; CHECK-NOT:	sfh	$3, $3, $4
+; CHECK:	sfh	$3, $4, $3
+	%1 = sub i16 %param1, %param2
+	ret i16 %1
+}
+
+define float @subfloat( float %param1, float %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=ra-rb 
+; (yes this is reverse of i32 instruction)
+; CHECK-NOT:	fs	$3, $4, $3 
+; CHECK:	fs	$3, $3, $4
+	%1 = fsub float %param1, %param2
+	ret float %1
+}
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index 4b332b3..d5a4d6a 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -regalloc=local
+; RUN: llc < %s -regalloc=fast
 	
 %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
 @search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]
diff --git a/test/CodeGen/Generic/legalize-dbg-value.ll b/test/CodeGen/Generic/legalize-dbg-value.ll
new file mode 100644
index 0000000..b71aa8a
--- /dev/null
+++ b/test/CodeGen/Generic/legalize-dbg-value.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -o /dev/null
+
+; llvm.dbg.value instructions can have types which are not legal for the
+; target. CodeGen should handle this.
+
+define i128 @__mulvti3(i128 %a, i128 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
+  unreachable
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i128 170141183460469231731687303715884105727} 
+!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__mulvti3", metadata !"__mulvti3", metadata !"__mulvti3", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2328)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !8, metadata !8}
+!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 524329, metadata !"int_lib.h", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 29, i32 0, metadata !2, null}
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index be28a9a..9c28da8 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s | grep {subfc r3,r5,r4}
-; RUN: llc < %s | grep {subfze r4,r6}
-; RUN: llc < %s -regalloc=local | grep {subfc r6,r5,r4}
-; RUN: llc < %s -regalloc=local | grep {subfze r3,r3}
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=local | FileCheck %s
+; RUN: llc < %s -regalloc=fast | FileCheck %s
 ; The first argument of subfc must not be the same as any other register.
 
+; CHECK: subfc [[REG:r.]],
+; CHECK-NOT: [[REG]]
+; CHECK: InlineAsm End
 ; PR1357
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index ee61478..3cfe603 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index 5a07a9b..8339a0b 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index cfa1b10..45dfdc8 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast
 
 define i32 @bork(i64 %foo, i64 %bar) {
 entry:
diff --git a/test/CodeGen/PowerPC/cr_spilling.ll b/test/CodeGen/PowerPC/cr_spilling.ll
index b215868..9ed2614 100644
--- a/test/CodeGen/PowerPC/cr_spilling.ll
+++ b/test/CodeGen/PowerPC/cr_spilling.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
+; RUN: llc < %s -march=ppc32 -regalloc=fast -O0 -relocation-model=pic -o -
 ; PR1638
 
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
index 6a76a8e..b37f7e9 100644
--- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
+++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep 168
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
 target triple = "s390x-linux"
@@ -8,6 +8,8 @@ declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) n
 declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
 
 define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
+; CHECK: lg %r11, 328(%r15)
+
 newFuncRoot:
 	br label %bb3
 
diff --git a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll b/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
index d676369..300e66c 100644
--- a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
+++ b/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -regalloc=local -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s
 
 target triple = "thumbv6-apple-darwin10"
 
@@ -6,10 +7,10 @@ target triple = "thumbv6-apple-darwin10"
 
 define arm_apcscc void @foo() nounwind {
 entry:
-; CHECK: str r0, [sp]
+; CHECK: str r0, [sp
   %0 = call arm_apcscc  i32 (...)* @bar() nounwind ; <i32> [#uses=1]
 ; CHECK: blx _bar
-; CHECK: ldr r1, [sp]
+; CHECK: ldr r1, [sp
   store i32 %0, i32* @fred, align 4
   br label %return
 
diff --git a/test/CodeGen/Thumb/trap.ll b/test/CodeGen/Thumb/trap.ll
new file mode 100644
index 0000000..76a0589
--- /dev/null
+++ b/test/CodeGen/Thumb/trap.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+; rdar://7961298
+
+define arm_apcscc void @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: trap
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/Thumb2/2010-05-24-rsbs.ll b/test/CodeGen/Thumb2/2010-05-24-rsbs.ll
new file mode 100644
index 0000000..7a40aa9
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-05-24-rsbs.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8017376: Missing 's' suffix for t2RSBS instructions.
+; CHECK: rsbs
+
+define arm_apcscc i64 @test(i64 %x) nounwind readnone {
+entry:
+  %0 = sub nsw i64 1, %x                          ; <i64> [#uses=1]
+  ret i64 %0
+}
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
new file mode 100644
index 0000000..0cddd48
--- /dev/null
+++ b/test/CodeGen/Thumb2/div.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
+; RUN: llc < %s -march=arm -mcpu=cortex-m3 -mattr=+thumb2 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f1
+; CHECK-THUMB: __divsi3
+; CHECK-THUMBV7M: f1
+; CHECK-THUMBV7M: sdiv
+        %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f2
+; CHECK-THUMB: __udivsi3
+; CHECK-THUMBV7M: f2
+; CHECK-THUMBV7M: udiv
+        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f3
+; CHECK-THUMB: __modsi3
+; CHECK-THUMBV7M: f3
+; CHECK-THUMBV7M: sdiv
+        %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f4
+; CHECK-THUMB: __umodsi3
+; CHECK-THUMBV7M: f4
+; CHECK-THUMBV7M: udiv
+        %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index c298aa2..98acc28 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim                       | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim                       | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=PIC
 ; rdar://7353541
 ; rdar://7354376
 
@@ -8,9 +8,9 @@
 
 @GV = external global i32                         ; <i32*> [#uses=2]
 
-define arm_apcscc void @t(i32* nocapture %vals, i32 %c) nounwind {
+define arm_apcscc void @t1(i32* nocapture %vals, i32 %c) nounwind {
 entry:
-; CHECK: t:
+; CHECK: t1:
 ; CHECK: cbz
   %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
   br i1 %0, label %return, label %bb.nph
@@ -22,8 +22,7 @@ bb.nph:                                           ; preds = %entry
 ; CHECK: ldr r3, [r2]
 ; CHECK: LBB0_2
 ; CHECK: LCPI0_0:
-; CHECK-NOT: LCPI1_1:
-; CHECK: .section
+; CHECK-NOT: LCPI0_1:
 
 ; PIC: BB#1
 ; PIC: ldr.n r2, LCPI0_0
@@ -51,3 +50,37 @@ bb:                                               ; preds = %bb, %bb.nph
 return:                                           ; preds = %bb, %entry
   ret void
 }
+
+; rdar://8001136
+define arm_apcscc void @t2(i8* %ptr1, i8* %ptr2) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: adr r{{.}}, #LCPI1_0
+; CHECK: vldmia r3, {d0,d1}
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK-NEXT: %bb1
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tmp1 = shl i32 %indvar, 2
+  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
+  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1)
+  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
+  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
+  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3)
+  %indvar.next = add i32 %indvar, 1
+  %cond = icmp eq i32 %indvar.next, 10
+  br i1 %cond, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+; CHECK: LCPI1_0:
+; CHECK: .section
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/sign_extend_inreg.ll b/test/CodeGen/Thumb2/sign_extend_inreg.ll
new file mode 100644
index 0000000..9a02c1c
--- /dev/null
+++ b/test/CodeGen/Thumb2/sign_extend_inreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-A8
+; RUN: llc < %s -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK-M3
+
+target triple = "thumbv7-apple-darwin10"
+
+define arm_apcscc i32 @f1(i16* %ptr) nounwind {
+; CHECK-A8: f1
+; CHECK-A8: sxth
+; CHECK-M3: f1
+; CHECK-M3-NOT: sxth
+; CHECK-M3: bx lr
+  %1 = load i16* %ptr
+  %2 = icmp eq i16 %1, 1
+  %3 = sext i16 %1 to i32
+  br i1 %2, label %.next, label %.exit
+
+.next:
+  br label %.exit
+
+.exit:
+  ret i32 %3
+}
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
index a982249..c8302df 100644
--- a/test/CodeGen/Thumb2/thumb2-pack.ll
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \
 ; RUN:   grep pkhbt | count 5
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \
 ; RUN:   grep pkhtb | count 4
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index 27b1672..2cee2e3 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a,+t2xtpk | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index b106ced..98854a1 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
 ; CHECK: t2ADDrs_lsl
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index 092ec27..bd4dcbe 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
 ; CHECK: f3
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index 16ea85d..ae17535 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 |  FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk |  FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index ff178b4..bf9c052 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -46,7 +46,8 @@ bb4:                                              ; preds = %bb193, %entry
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
-  br i1 undef, label %bb193, label %bb186
+  %tmp = extractelement <4 x i1> %22, i32 0
+  br i1 %tmp, label %bb193, label %bb186
 
 bb186:                                            ; preds = %bb4
   br label %bb193
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 054d5df..4b685a8 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
 define i32 @test0(i8 %A) {
 ; CHECK: test0
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 75e1d70..b8e4381 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
 define i8 @test1(i32 %A.u) zeroext {
 ; CHECK: test1
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 91598cd..5411914 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
 define i32 @test1(i32 %x) {
 ; CHECK: test1
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index d795610..8aabb52 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast
 
 define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
 entry:
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index 6b1eefe..d294885 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi
+; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi
 ; PR2082
 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
 ; registers.
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 2aea9c5..716563b 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=fast
 ; PR5534
 
 	%struct.CGPoint = type { double, double }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index f1a19ec..5929aff 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 74429c3..dd83336 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -1,18 +1,19 @@
-; RUN: llc < %s -march=x86 | not grep "movl %eax, %eax"
-; RUN: llc < %s -march=x86 | not grep "movl %edx, %edx"
-; RUN: llc < %s -march=x86 | not grep "movl (%eax), %eax"
-; RUN: llc < %s -march=x86 | not grep "movl (%edx), %edx"
-; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %eax, %eax"
-; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %edx, %edx"
-; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
-; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=local | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s
 
 ; %0 must not be put in EAX or EDX.
 ; In the first asm, $0 and $2 must not be put in EAX.
+; CHECK: InlineAsm Start
+; CHECK-NOT: movl %eax, %eax
+; CHECK-NOT: movl (%eax), %eax
+; CHECK: InlineAsm End
 ; In the second asm, $0 and $2 must not be put in EDX.
-; This is kind of hard to test thoroughly, but the things above should continue
-; to pass, I think.
-; ModuleID = '<stdin>'
+; CHECK: InlineAsm Start
+; CHECK-NOT: movl %edx, %edx
+; CHECK-NOT: movl (%edx), %edx
+; CHECK: InlineAsm End
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 @x = common global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index e3b6fdf..4400940 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)"
 ; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)"
+; RUN: llc < %s -march=x86 -regalloc=fast  | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)"
+
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
 ; operand.  There are many combinations that work; this is what llc puts out now.
diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
index ce3ea82..21b43fb 100644
--- a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=fast -disable-fp-elim
 ; rdar://6538384
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index bfa3eaa..e5d46f9 100644
--- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | not grep sil
 ; rdar://6787136
 
 	%struct.X = type { i8, [32 x i8] }
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index e6f3008..c598228 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
-target triple = "x86_64-mingw64"
+target triple = "x86_64-pc-mingw64"
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index cb64bf2..810a6f4 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -2,7 +2,7 @@
 ; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
 ; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
 ; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
-target triple = "x86_64-mingw64"
+target triple = "x86_64-pc-mingw64"
 
 define i32 @a() nounwind {
 entry:
diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
index 9456d91..2dc812d 100644
--- a/test/CodeGen/X86/2009-08-08-CastError.ll
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-mingw64 | grep movabsq
+; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | grep movabsq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index f5048af..4c95179 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -regalloc=local -relocation-model=pic -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -O0 -regalloc=fast -relocation-model=pic -disable-fp-elim | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
new file mode 100644
index 0000000..375f424
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
@@ -0,0 +1,33 @@
+; RUN-XFAIL: llc < %s -O0 -regalloc=local | FileCheck %s
+; RUN: llc < %s -O0 -regalloc=fast | FileCheck %s
+; PR6520
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%0 = type { i8*, i8*, i32 }
+
+define i8* @func() nounwind ssp {
+entry:
+  %retval = alloca i8*, align 4                   ; <i8**> [#uses=2]
+  %ret = alloca i8*, align 4                      ; <i8**> [#uses=2]
+  %p = alloca i8*, align 4                        ; <i8**> [#uses=1]
+  %t = alloca i32, align 4                        ; <i32*> [#uses=1]
+; The earlyclobber $1 should only appear once. It should not be shared.
+; CHECK: deafbeef, [[REG:%e.x]]
+; CHECK-NOT: [[REG]]
+; CHECK: InlineAsm End
+  %0 = call %0 asm "mov    $$0xdeafbeef, $1\0A\09mov    $$0xcafebabe, $0\0A\09mov    $0, $2\0A\09", "=&r,=&r,=&{cx},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0 ; <%0> [#uses=3]
+  %asmresult = extractvalue %0 %0, 0              ; <i8*> [#uses=1]
+  %asmresult1 = extractvalue %0 %0, 1             ; <i8*> [#uses=1]
+  %asmresult2 = extractvalue %0 %0, 2             ; <i32> [#uses=1]
+  store i8* %asmresult, i8** %ret
+  store i8* %asmresult1, i8** %p
+  store i32 %asmresult2, i32* %t
+  %tmp = load i8** %ret                           ; <i8*> [#uses=1]
+  store i8* %tmp, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
+
+!0 = metadata !{i32 79}
diff --git a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
new file mode 100644
index 0000000..e554f9f
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
@@ -0,0 +1,11 @@
+; RUN: llc -regalloc=local %s -o %t
+; RUN: llc -regalloc=fast %s -o %t
+; PR7066
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @sys_clone(i32 (i8*)* %fn, i8* %child_stack, i32 %flags, i8* %arg, i32* %parent_tidptr, i8* %newtls, i32* %child_tidptr) nounwind {
+  call i64 asm sideeffect "", "={ax},0,i,i,r,{si},{di},r,{dx},imr,imr,~{sp},~{memory},~{r8},~{r10},~{r11},~{cx},~{dirflag},~{fpsr},~{flags}"(i64 4294967274, i32 56, i32 60, i32 (i8*)* undef, i8* undef, i32 undef, i8* undef, i32* undef, i8* undef, i32* undef) nounwind ; <i64> [#uses=0]
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/2010-05-07-ldconvert.ll b/test/CodeGen/X86/2010-05-07-ldconvert.ll
new file mode 100644
index 0000000..0ba6a8f
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-07-ldconvert.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11
+; PR 7087 - used to crash
+
+define i32 @main() ssp {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %r = alloca i32, align 4                        ; <i32*> [#uses=2]
+  store i32 0, i32* %retval
+  %tmp = call x86_fp80 @llvm.powi.f80(x86_fp80 0xK3FFF8000000000000000, i32 -64) ; <x86_fp80> [#uses=1]
+  %conv = fptosi x86_fp80 %tmp to i32             ; <i32> [#uses=1]
+  store i32 %conv, i32* %r
+  %tmp1 = load i32* %r                            ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @_Z1fv()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) nounwind readonly
+
+declare void @_Z1fv()
diff --git a/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll b/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
new file mode 100644
index 0000000..e719da3
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR7018
+; rdar://7939869
+
+define i32 @CXB30130(i32 %num1, i16* nocapture %num2, float* nocapture %num3, double* nocapture %num4) nounwind ssp {
+entry:
+  %0 = load i16* %num2, align 2                   ; <i16> [#uses=2]
+  %1 = mul nsw i16 %0, %0                         ; <i16> [#uses=1]
+  store i16 %1, i16* %num2, align 2
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/2010-05-12-FastAllocKills.ll b/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
new file mode 100644
index 0000000..36a99d6
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
@@ -0,0 +1,59 @@
+; RUN: llc -regalloc=fast -verify-machineinstrs < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin"
+
+; This test causes a virtual FP register to be redefined while it is live:
+;BB#5: derived from LLVM BB %bb10
+;    Predecessors according to CFG: BB#4 BB#5
+;	%reg1024<def> = MOV_Fp8080 %reg1034
+;	%reg1025<def> = MUL_Fp80m32 %reg1024, %RIP, 1, %reg0, <cp#0>, %reg0; mem:LD4[ConstantPool]
+;	%reg1034<def> = MOV_Fp8080 %reg1025
+;	FP_REG_KILL %FP0<imp-def>, %FP1<imp-def>, %FP2<imp-def>, %FP3<imp-def>, %FP4<imp-def>, %FP5<imp-def>, %FP6<imp-def>
+;	JMP_4 <BB#5>
+;    Successors according to CFG: BB#5
+;
+; The X86FP pass needs good kill flags, like on %FP0 representing %reg1034:
+;BB#5: derived from LLVM BB %bb10
+;    Predecessors according to CFG: BB#4 BB#5
+;	%FP0<def> = LD_Fp80m <fi#3>, 1, %reg0, 0, %reg0; mem:LD10[FixedStack3](align=4)
+;	%FP1<def> = MOV_Fp8080 %FP0<kill>
+;	%FP2<def> = MUL_Fp80m32 %FP1, %RIP, 1, %reg0, <cp#0>, %reg0; mem:LD4[ConstantPool]
+;	%FP0<def> = MOV_Fp8080 %FP2
+;	ST_FpP80m <fi#3>, 1, %reg0, 0, %reg0, %FP0<kill>; mem:ST10[FixedStack3](align=4)
+;	ST_FpP80m <fi#4>, 1, %reg0, 0, %reg0, %FP1<kill>; mem:ST10[FixedStack4](align=4)
+;	ST_FpP80m <fi#5>, 1, %reg0, 0, %reg0, %FP2<kill>; mem:ST10[FixedStack5](align=4)
+;	FP_REG_KILL %FP0<imp-def>, %FP1<imp-def>, %FP2<imp-def>, %FP3<imp-def>, %FP4<imp-def>, %FP5<imp-def>, %FP6<imp-def>
+;	JMP_4 <BB#5>
+;    Successors according to CFG: BB#5
+
+define fastcc i32 @sqlite3AtoF(i8* %z, double* nocapture %pResult) nounwind ssp {
+entry:
+  br i1 undef, label %bb2, label %bb1.i.i
+
+bb1.i.i:                                          ; preds = %entry
+  unreachable
+
+bb2:                                              ; preds = %entry
+  br i1 undef, label %isdigit339.exit11.preheader, label %bb13
+
+isdigit339.exit11.preheader:                      ; preds = %bb2
+  br i1 undef, label %bb12, label %bb10
+
+bb10:                                             ; preds = %bb10, %isdigit339.exit11.preheader
+  %divisor.041 = phi x86_fp80 [ %0, %bb10 ], [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ] ; <x86_fp80> [#uses=1]
+  %0 = fmul x86_fp80 %divisor.041, 0xK4002A000000000000000 ; <x86_fp80> [#uses=2]
+  br i1 false, label %bb12, label %bb10
+
+bb12:                                             ; preds = %bb10, %isdigit339.exit11.preheader
+  %divisor.0.lcssa = phi x86_fp80 [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ], [ %0, %bb10 ] ; <x86_fp80> [#uses=0]
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb2
+  br i1 undef, label %bb34, label %bb36
+
+bb34:                                             ; preds = %bb13
+  br label %bb36
+
+bb36:                                             ; preds = %bb34, %bb13
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/2010-05-16-nosseconversion.ll b/test/CodeGen/X86/2010-05-16-nosseconversion.ll
new file mode 100644
index 0000000..889575c
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-16-nosseconversion.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mattr=-sse < %s
+; PR 7135
+
+@x = common global i64 0                          ; <i64*> [#uses=1]
+
+define i32 @foo() nounwind readonly ssp {
+entry:
+  %0 = load i64* @x, align 8                      ; <i64> [#uses=1]
+  %1 = uitofp i64 %0 to double                    ; <double> [#uses=1]
+  %2 = fptosi double %1 to i32                    ; <i32> [#uses=1]
+  ret i32 %2
+}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
new file mode 100644
index 0000000..13f72a9
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -0,0 +1,66 @@
+; RUN: llc -O2 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin"
+
+%struct.a = type { i32, %struct.a* }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.a*)* @bar to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %myvar}, i64 0, metadata !8)
+  %0 = getelementptr inbounds %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
+  tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
+  %2 = bitcast %struct.a* %myvar to i8*, !dbg !30 ; <i8*> [#uses=1]
+  ret i8* %2, !dbg !30
+}
+
+declare void @foo(i32) nounwind optsize noinline ssp
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.gv = !{!0}
+!llvm.dbg.lv = !{!4, !8, !18, !25, !26}
+
+!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!4 = metadata !{i32 524545, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{null, metadata !3}
+!8 = metadata !{i32 524545, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!15 = metadata !{metadata !16, metadata !17}
+!16 = metadata !{i32 524301, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
+!17 = metadata !{i32 524301, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
+!18 = metadata !{i32 524545, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{metadata !3, metadata !3, metadata !22}
+!22 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 524545, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524544, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 524299, metadata !19, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 18, i32 0, metadata !29, null}
+!29 = metadata !{i32 524299, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 19, i32 0, metadata !29, null}
+
+; CHECK: Ldebug_loc0:
+; CHECK-NEXT: .quad   Lfunc_begin0
+; CHECK-NEXT: .quad   Ltmp3
+; CHECK-NEXT: .short  1
+; CHECK-NEXT: .byte   85
+; CHECK-NEXT: .quad   Ltmp3
+; CHECK-NEXT: .quad   Lfunc_end
+; CHECK-NEXT: .short  1
+; CHECK-NEXT: .byte   83
diff --git a/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll b/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
new file mode 100644
index 0000000..38dcb80
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
@@ -0,0 +1,16 @@
+; RUN: llc -O0 -mcpu=i386 -mattr=-sse,-mmx < %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.1 20100510 (prerelease) LLVM: 104604:104605\22"
+
+define i32 @f2(double %x) nounwind {
+entry:
+  %0 = load double* undef, align 64               ; <double> [#uses=1]
+  %1 = fptoui double %0 to i16                    ; <i16> [#uses=1]
+  %2 = zext i16 %1 to i32                         ; <i32> [#uses=1]
+  %3 = add nsw i32 0, %2                          ; <i32> [#uses=1]
+  store i32 %3, i32* undef, align 1
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 87785bc..255adfb 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
-; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep {call.*12345678}
 ; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll
index 5e88ed7..ea10897 100644
--- a/test/CodeGen/X86/fast-cc-callee-pops.ll
+++ b/test/CodeGen/X86/fast-cc-callee-pops.ll
@@ -1,7 +1,13 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | FileCheck %s
 
 ; Check that a fastcc function pops its stack variables before returning.
 
 define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind {
         ret void
+; CHECK: ret{{.*}}20
+}
+
+define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind {
+        ret void
+; CHECK: ret{{.*}}20
 }
diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
index fe96c0c..a96e504 100644
--- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -1,15 +1,29 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {mov	EDX, 1}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
 ; check that fastcc is passing stuff in regs.
 
 declare x86_fastcallcc i64 @callee(i64)
 
 define i64 @caller() {
         %X = call x86_fastcallcc  i64 @callee( i64 4294967299 )          ; <i64> [#uses=1]
+; CHECK: mov{{.*}}EDX, 1
         ret i64 %X
 }
 
 define x86_fastcallcc i64 @caller2(i64 %X) {
         ret i64 %X
+; CHECK: mov{{.*}}EAX, ECX
+}
+
+declare x86_thiscallcc i64 @callee2(i32)
+
+define i64 @caller3() {
+        %X = call x86_thiscallcc i64 @callee2( i32 3 )
+; CHECK: mov{{.*}}ECX, 3
+        ret i64 %X
+}
+
+define x86_thiscallcc i32 @caller4(i32 %X) {
+        ret i32 %X
+; CHECK: mov{{.*}}EAX, ECX
 }
 
diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll
index 4768ea2..bbadca5 100644
--- a/test/CodeGen/X86/fp-stack-O0-crash.ll
+++ b/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc %s -O0 -fast-isel -regalloc=local -o -
+; RUN: llc %s -O0 -fast-isel -regalloc=fast -o -
 ; PR4767
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/fp-stack.ll b/test/CodeGen/X86/fp-stack.ll
new file mode 100644
index 0000000..dca644d
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -o - -mcpu=pentium
+; PR6828
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+  %tmp6 = load x86_fp80* undef                       ; <x86_fp80> [#uses=2]
+  %tmp15 = load x86_fp80* undef                      ; <x86_fp80> [#uses=2]
+  %tmp24 = load x86_fp80* undef                      ; <x86_fp80> [#uses=1]
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6          ; <i1> [#uses=1]
+  %maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; <x86_fp80> [#uses=1]
+  %cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0   ; <i1> [#uses=1]
+  br i1 %cmp139.1, label %sw.bb372, label %return
+
+sw.bb372:                                         ; preds = %for.end
+  ret void
+
+return:                                           ; preds = %for.end
+  ret void
+}
+
diff --git a/test/CodeGen/X86/label-redefinition.ll b/test/CodeGen/X86/label-redefinition.ll
new file mode 100644
index 0000000..9ad33e0
--- /dev/null
+++ b/test/CodeGen/X86/label-redefinition.ll
@@ -0,0 +1,15 @@
+; PR7054
+; RUN: not llc %s -o - |& grep {'_foo' label emitted multiple times to assembly}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+define i32 @"\01_foo"() {
+  unreachable
+}
+
+define i32 @foo() {
+entry:
+  unreachable
+}
+
+declare i32 @xstat64(i32, i8*, i8*)
diff --git a/test/CodeGen/X86/liveness-local-regalloc.ll b/test/CodeGen/X86/liveness-local-regalloc.ll
index 17e65d8..8cac3f8 100644
--- a/test/CodeGen/X86/liveness-local-regalloc.ll
+++ b/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O3 -regalloc=local -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10
 ; <rdar://problem/7755473>
 
 %0 = type { i32, i8*, i8*, %1*, i8*, i64, i64, i32, i32, i32, i32, [1024 x i8] }
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll
index 17d6a4c..8afbb0d 100644
--- a/test/CodeGen/X86/lsr-delayed-fold.ll
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -49,3 +49,86 @@ lbl_264:                                          ; preds = %if.end, %lbl_264.pr
   %tobool12 = icmp eq i8 %mul.i18, 0              ; <i1> [#uses=1]
   unreachable
 }
+
+; LSR ends up going into conservative pruning mode; don't prune the solution
+; so far that it becomes unsolvable though.
+; PR7077
+
+%struct.Bu = type { i32, i32, i32 }
+
+define void @_Z3fooP2Bui(%struct.Bu* nocapture %bu) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc131, %entry
+  %indvar = phi i64 [ %indvar.next, %for.inc131 ], [ 0, %entry ] ; <i64> [#uses=3]
+  br i1 undef, label %for.inc131, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %tmp15 = add i64 %indvar, 1                     ; <i64> [#uses=1]
+  %tmp17 = add i64 %indvar, 2                      ; <i64> [#uses=1]
+  %tmp19 = add i64 %indvar, 3                      ; <i64> [#uses=1]
+  %tmp21 = add i64 %indvar, 4                      ; <i64> [#uses=1]
+  %tmp23 = add i64 %indvar, 5                      ; <i64> [#uses=1]
+  %tmp25 = add i64 %indvar, 6                      ; <i64> [#uses=1]
+  %tmp27 = add i64 %indvar, 7                      ; <i64> [#uses=1]
+  %tmp29 = add i64 %indvar, 8                      ; <i64> [#uses=1]
+  %tmp31 = add i64 %indvar, 9                      ; <i64> [#uses=1]
+  %tmp35 = add i64 %indvar, 11                     ; <i64> [#uses=1]
+  %tmp37 = add i64 %indvar, 12                     ; <i64> [#uses=1]
+  %tmp39 = add i64 %indvar, 13                     ; <i64> [#uses=1]
+  %tmp41 = add i64 %indvar, 14                     ; <i64> [#uses=1]
+  %tmp43 = add i64 %indvar, 15                     ; <i64> [#uses=1]
+  %tmp45 = add i64 %indvar, 16                     ; <i64> [#uses=1]
+  %tmp47 = add i64 %indvar, 17                     ; <i64> [#uses=1]
+  %mul = trunc i64 %indvar to i32                  ; <i32> [#uses=1]
+  %add22 = trunc i64 %tmp15 to i32                ; <i32> [#uses=1]
+  %add28 = trunc i64 %tmp17 to i32                ; <i32> [#uses=1]
+  %add34 = trunc i64 %tmp19 to i32                ; <i32> [#uses=1]
+  %add40 = trunc i64 %tmp21 to i32                ; <i32> [#uses=1]
+  %add46 = trunc i64 %tmp23 to i32                ; <i32> [#uses=1]
+  %add52 = trunc i64 %tmp25 to i32                ; <i32> [#uses=1]
+  %add58 = trunc i64 %tmp27 to i32                ; <i32> [#uses=1]
+  %add64 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+  %add70 = trunc i64 %tmp31 to i32                ; <i32> [#uses=1]
+  %add82 = trunc i64 %tmp35 to i32                ; <i32> [#uses=1]
+  %add88 = trunc i64 %tmp37 to i32                ; <i32> [#uses=1]
+  %add94 = trunc i64 %tmp39 to i32                ; <i32> [#uses=1]
+  %add100 = trunc i64 %tmp41 to i32               ; <i32> [#uses=1]
+  %add106 = trunc i64 %tmp43 to i32               ; <i32> [#uses=1]
+  %add112 = trunc i64 %tmp45 to i32               ; <i32> [#uses=1]
+  %add118 = trunc i64 %tmp47 to i32               ; <i32> [#uses=1]
+  %tmp10 = getelementptr %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
+  %tmp11 = load i32* %tmp10                       ; <i32> [#uses=0]
+  tail call void undef(i32 %add22)
+  tail call void undef(i32 %add28)
+  tail call void undef(i32 %add34)
+  tail call void undef(i32 %add40)
+  tail call void undef(i32 %add46)
+  tail call void undef(i32 %add52)
+  tail call void undef(i32 %add58)
+  tail call void undef(i32 %add64)
+  tail call void undef(i32 %add70)
+  tail call void undef(i32 %add82)
+  tail call void undef(i32 %add88)
+  tail call void undef(i32 %add94)
+  tail call void undef(i32 %add100)
+  tail call void undef(i32 %add106)
+  tail call void undef(i32 %add112)
+  tail call void undef(i32 %add118)
+  br label %for.body123
+
+for.body123:                                      ; preds = %for.body123, %lor.lhs.false
+  %j.03 = phi i32 [ 0, %lor.lhs.false ], [ %inc, %for.body123 ] ; <i32> [#uses=2]
+  %add129 = add i32 %mul, %j.03                   ; <i32> [#uses=1]
+  tail call void undef(i32 %add129)
+  %inc = add nsw i32 %j.03, 1                     ; <i32> [#uses=1]
+  br i1 undef, label %for.inc131, label %for.body123
+
+for.inc131:                                       ; preds = %for.body123, %for.body
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 undef, label %for.end134, label %for.body
+
+for.end134:                                       ; preds = %for.inc131
+  ret void
+}
diff --git a/test/CodeGen/X86/mcinst-lowering-cmp0.ll b/test/CodeGen/X86/mcinst-lowering-cmp0.ll
new file mode 100644
index 0000000..756be1f
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-lowering-cmp0.ll
@@ -0,0 +1,68 @@
+; RUN: llc --show-mc-encoding -relocation-model=pic -disable-fp-elim -O3 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+%struct._objc_symtab = type { i32, i8*, i16, i16, [0 x i8*] }
+
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular" ; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=3]
+@__CFConstantStringClassReference = external global [0 x i32] ; <[0 x i32]*> [#uses=1]
+@.str = private constant [3 x i8] c"||\00"        ; <[3 x i8]*> [#uses=1]
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 2 }, section "__DATA,__cfstring" ; <%struct.NSConstantString*> [#uses=1]
+@"\01L_OBJC_METH_VAR_NAME_1" = internal global [5 x i8] c"baz:\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[5 x i8]*> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = internal global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_3" = internal global [4 x i8] c"bar\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_4" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1 ; <[1 x i8]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4 ; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [9 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" to i8*), i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata" ; <[9 x i8*]*> [#uses=0]
+
+define void @f0(i8* nocapture %a, i8* nocapture %b) nounwind optsize ssp {
+entry:
+  %call = tail call i32 (...)* @get_name() nounwind optsize ; <i32> [#uses=2]
+  %conv = inttoptr i32 %call to i8*               ; <i8*> [#uses=1]
+  %call1 = tail call i32 (...)* @get_dict() nounwind optsize ; <i32> [#uses=2]
+  %conv2 = inttoptr i32 %call1 to i8*             ; <i8*> [#uses=2]
+
+; Check that we lower to the short form of cmpl, which has an 8-bit immediate.
+;
+; CHECK: cmpl  $0, -16(%ebp)           ## 4-byte Folded Reload
+; CHECK:                               ## encoding: [0x83,0x7d,0xf0,0x00]
+; rdar://7999130
+  %cmp = icmp eq i32 %call1, 0                    ; <i1> [#uses=1]
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; <i8*> [#uses=1]
+  %call6 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp5) nounwind optsize ; <i8*> [#uses=1]
+  %tmp7 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" ; <i8*> [#uses=1]
+  %call820 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6, i8* %tmp7, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*)) nounwind optsize ; <i8*> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %tmp10 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; <i8*> [#uses=1]
+  %call11 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp10) nounwind optsize ; <i8*> [#uses=1]
+  %tmp12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" ; <i8*> [#uses=1]
+  %call13 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call11, i8* %tmp12) nounwind optsize ; <i8*> [#uses=0]
+  %cmp15 = icmp eq i32 %call, 0                   ; <i1> [#uses=1]
+  br i1 %cmp15, label %if.end19, label %if.then17
+
+if.then17:                                        ; preds = %if.end
+  tail call void (...)* @f1(i8* %conv) nounwind optsize
+  ret void
+
+if.end19:                                         ; preds = %if.end
+  ret void
+}
+
+declare i32 @get_name(...) optsize
+
+declare i32 @get_dict(...) optsize
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare void @f1(...) optsize
diff --git a/test/CodeGen/X86/mcinst-lowering.ll b/test/CodeGen/X86/mcinst-lowering.ll
new file mode 100644
index 0000000..1ef5a97
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-lowering.ll
@@ -0,0 +1,26 @@
+; RUN: llc --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @f0(i32* nocapture %x) nounwind readonly ssp {
+entry:
+  %tmp1 = load i32* %x                            ; <i32> [#uses=2]
+  %tobool = icmp eq i32 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+
+; Check that we lower to the short form of cmpl, which has a fixed %eax
+; register.
+;
+; CHECK: cmpl $16777216, %eax
+; CHECK: # encoding: [0x3d,0x00,0x00,0x00,0x01]
+  %cmp = icmp eq i32 %tmp1, 16777216              ; <i1> [#uses=1]
+
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  ret i32 %conv
+
+return:                                           ; preds = %entry
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index aa1b437..3cc83ca 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=linux | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movups
 
 define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
 entry:
diff --git a/test/CodeGen/X86/stack-color-with-reg-2.ll b/test/CodeGen/X86/stack-color-with-reg-2.ll
deleted file mode 100644
index c1f2672..0000000
--- a/test/CodeGen/X86/stack-color-with-reg-2.ll
+++ /dev/null
@@ -1,230 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
-
-	%"struct..0$_67" = type { i32, %"struct.llvm::MachineOperand"**, %"struct.llvm::MachineOperand"* }
-	%"struct..1$_69" = type { i32 }
-	%"struct.llvm::AbstractTypeUser" = type { i32 (...)** }
-	%"struct.llvm::AliasAnalysis" = type opaque
-	%"struct.llvm::AnalysisResolver" = type { %"struct.std::vector<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >", %"struct.llvm::PMDataManager"* }
-	%"struct.llvm::Annotable" = type { %"struct.llvm::Annotation"* }
-	%"struct.llvm::Annotation" = type { i32 (...)**, %"struct..1$_69", %"struct.llvm::Annotation"* }
-	%"struct.llvm::Argument" = type { %"struct.llvm::Value", %"struct.llvm::ilist_node<llvm::Argument>", %"struct.llvm::Function"* }
-	%"struct.llvm::AttrListPtr" = type { %"struct.llvm::AttributeListImpl"* }
-	%"struct.llvm::AttributeListImpl" = type opaque
-	%"struct.llvm::BasicBlock" = type { %"struct.llvm::Value", %"struct.llvm::ilist_node<llvm::BasicBlock>", %"struct.llvm::iplist<llvm::Instruction,llvm::ilist_traits<llvm::Instruction> >", %"struct.llvm::Function"* }
-	%"struct.llvm::BitVector" = type { i32*, i32, i32 }
-	%"struct.llvm::BumpPtrAllocator" = type { i8* }
-	%"struct.llvm::CalleeSavedInfo" = type { i32, %"struct.llvm::TargetRegisterClass"*, i32 }
-	%"struct.llvm::Constant" = type { %"struct.llvm::User" }
-	%"struct.llvm::DebugLocTracker" = type { %"struct.std::vector<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >", %"struct.llvm::DenseMap<llvm::DebugLocTuple,unsigned int,llvm::DenseMapInfo<llvm::DebugLocTuple>,llvm::DenseMapInfo<unsigned int> >" }
-	%"struct.llvm::DebugLocTuple" = type { %"struct.llvm::GlobalVariable"*, i32, i32 }
-	%"struct.llvm::DenseMap<llvm::DebugLocTuple,unsigned int,llvm::DenseMapInfo<llvm::DebugLocTuple>,llvm::DenseMapInfo<unsigned int> >" = type { i32, %"struct.std::pair<llvm::DebugLocTuple,unsigned int>"*, i32, i32 }
-	%"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >" = type { i32, %"struct.std::pair<llvm::MachineInstr*,unsigned int>"*, i32, i32 }
-	%"struct.llvm::DenseMap<unsigned int,char,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<char> >" = type { i32, %"struct.std::pair<unsigned int,char>"*, i32, i32 }
-	%"struct.llvm::DenseMap<unsigned int,llvm::LiveInterval*,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<llvm::LiveInterval*> >" = type { i32, %"struct.std::pair<unsigned int,llvm::LiveInterval*>"*, i32, i32 }
-	%"struct.llvm::DenseSet<unsigned int,llvm::DenseMapInfo<unsigned int> >" = type { %"struct.llvm::DenseMap<unsigned int,char,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<char> >" }
-	%"struct.llvm::Function" = type { %"struct.llvm::GlobalValue", %"struct.llvm::Annotable", %"struct.llvm::ilist_node<llvm::Function>", %"struct.llvm::iplist<llvm::BasicBlock,llvm::ilist_traits<llvm::BasicBlock> >", %"struct.llvm::iplist<llvm::Argument,llvm::ilist_traits<llvm::Argument> >", %"struct.llvm::ValueSymbolTable"*, %"struct.llvm::AttrListPtr" }
-	%"struct.llvm::FunctionPass" = type { %"struct.llvm::Pass" }
-	%"struct.llvm::GlobalValue" = type { %"struct.llvm::Constant", %"struct.llvm::Module"*, i32, %"struct.std::string" }
-	%"struct.llvm::GlobalVariable" = type opaque
-	%"struct.llvm::Instruction" = type { %"struct.llvm::User", %"struct.llvm::ilist_node<llvm::Instruction>", %"struct.llvm::BasicBlock"* }
-	%"struct.llvm::LiveInterval" = type <{ i32, float, i16, [6 x i8], %"struct.llvm::SmallVector<llvm::LiveRange,4u>", %"struct.llvm::SmallVector<llvm::MachineBasicBlock*,4u>" }>
-	%"struct.llvm::LiveIntervals" = type { %"struct.llvm::MachineFunctionPass", %"struct.llvm::MachineFunction"*, %"struct.llvm::MachineRegisterInfo"*, %"struct.llvm::TargetMachine"*, %"struct.llvm::TargetRegisterInfo"*, %"struct.llvm::TargetInstrInfo"*, %"struct.llvm::AliasAnalysis"*, %"struct.llvm::LiveVariables"*, %"struct.llvm::BumpPtrAllocator", %"struct.std::vector<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >", %"struct.std::vector<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >", i64, %"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >", %"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >", %"struct.llvm::DenseMap<unsigned int,llvm::LiveInterval*,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<llvm::LiveInterval*> >", %"struct.llvm::BitVector", %"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" }
-	%"struct.llvm::LiveVariables" = type opaque
-	%"struct.llvm::MVT" = type { %"struct..1$_69" }
-	%"struct.llvm::MachineBasicBlock" = type { %"struct.llvm::ilist_node<llvm::MachineBasicBlock>", %"struct.llvm::ilist<llvm::MachineInstr>", %"struct.llvm::BasicBlock"*, i32, %"struct.llvm::MachineFunction"*, %"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >", %"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >", %"struct.std::vector<int,std::allocator<int> >", i32, i8 }
-	%"struct.llvm::MachineConstantPool" = type opaque
-	%"struct.llvm::MachineFrameInfo" = type { %"struct.std::vector<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >", i32, i8, i8, i64, i32, i32, i8, i32, i32, %"struct.std::vector<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >", %"struct.llvm::MachineModuleInfo"*, %"struct.llvm::TargetFrameInfo"* }
-	%"struct.llvm::MachineFrameInfo::StackObject" = type { i64, i32, i8, i64 }
-	%"struct.llvm::MachineFunction" = type { %"struct.llvm::Annotation", %"struct.llvm::Function"*, %"struct.llvm::TargetMachine"*, %"struct.llvm::MachineRegisterInfo"*, %"struct.llvm::AbstractTypeUser"*, %"struct.llvm::MachineFrameInfo"*, %"struct.llvm::MachineConstantPool"*, %"struct.llvm::MachineJumpTableInfo"*, %"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >", %"struct.llvm::BumpPtrAllocator", %"struct.llvm::Recycler<llvm::MachineBasicBlock,80ul,4ul>", %"struct.llvm::Recycler<llvm::MachineBasicBlock,80ul,4ul>", %"struct.llvm::ilist<llvm::MachineBasicBlock>", %"struct..1$_69", %"struct.llvm::DebugLocTracker" }
-	%"struct.llvm::MachineFunctionPass" = type { %"struct.llvm::FunctionPass" }
-	%"struct.llvm::MachineInstr" = type { %"struct.llvm::ilist_node<llvm::MachineInstr>", %"struct.llvm::TargetInstrDesc"*, i16, %"struct.std::vector<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >", %"struct.std::list<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >", %"struct.llvm::MachineBasicBlock"*, %"struct..1$_69" }
-	%"struct.llvm::MachineJumpTableInfo" = type opaque
-	%"struct.llvm::MachineModuleInfo" = type opaque
-	%"struct.llvm::MachineOperand" = type { i8, i8, i8, %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineOperand::$_66" }
-	%"struct.llvm::MachineOperand::$_66" = type { %"struct..0$_67" }
-	%"struct.llvm::MachineRegisterInfo" = type { %"struct.std::vector<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >", %"struct.std::vector<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >", %"struct.llvm::MachineOperand"**, %"struct.llvm::BitVector", %"struct.std::vector<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >", %"struct.std::vector<int,std::allocator<int> >" }
-	%"struct.llvm::Module" = type opaque
-	%"struct.llvm::PATypeHandle" = type { %"struct.llvm::Type"*, %"struct.llvm::AbstractTypeUser"* }
-	%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* }
-	%"struct.llvm::PMDataManager" = type opaque
-	%"struct.llvm::Pass" = type { i32 (...)**, %"struct.llvm::AnalysisResolver"*, i32 }
-	%"struct.llvm::PassInfo" = type { i8*, i8*, i32, i8, i8, i8, %"struct.std::vector<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >", %"struct.llvm::Pass"* ()* }
-	%"struct.llvm::Recycler<llvm::MachineBasicBlock,80ul,4ul>" = type { %"struct.llvm::iplist<llvm::RecyclerStruct,llvm::ilist_traits<llvm::RecyclerStruct> >" }
-	%"struct.llvm::RecyclerStruct" = type { %"struct.llvm::RecyclerStruct"*, %"struct.llvm::RecyclerStruct"* }
-	%"struct.llvm::SmallVector<llvm::LiveRange,4u>" = type <{ [17 x i8], [47 x i8] }>
-	%"struct.llvm::SmallVector<llvm::MachineBasicBlock*,4u>" = type <{ [17 x i8], [15 x i8] }>
-	%"struct.llvm::TargetAsmInfo" = type opaque
-	%"struct.llvm::TargetFrameInfo" = type opaque
-	%"struct.llvm::TargetInstrDesc" = type { i16, i16, i16, i16, i8*, i32, i32, i32*, i32*, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetOperandInfo"* }
-	%"struct.llvm::TargetInstrInfo" = type { i32 (...)**, %"struct.llvm::TargetInstrDesc"*, i32 }
-	%"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.llvm::TargetAsmInfo"* }
-	%"struct.llvm::TargetOperandInfo" = type { i16, i16, i32 }
-	%"struct.llvm::TargetRegisterClass" = type { i32 (...)**, i32, i8*, %"struct.llvm::MVT"*, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, i32, i32, i32, i32*, i32*, %"struct.llvm::DenseSet<unsigned int,llvm::DenseMapInfo<unsigned int> >" }
-	%"struct.llvm::TargetRegisterDesc" = type { i8*, i8*, i32*, i32*, i32* }
-	%"struct.llvm::TargetRegisterInfo" = type { i32 (...)**, i32*, i32, i32*, i32, i32*, i32, %"struct.llvm::TargetRegisterDesc"*, i32, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, i32, i32 }
-	%"struct.llvm::Type" = type { %"struct.llvm::AbstractTypeUser", i8, [3 x i8], i32, %"struct.llvm::Type"*, %"struct.std::vector<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >", i32, %"struct.llvm::PATypeHandle"* }
-	%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct..1$_69" }
-	%"struct.llvm::User" = type { %"struct.llvm::Value", %"struct.llvm::Use"*, i32 }
-	%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* }
-	%"struct.llvm::ValueName" = type opaque
-	%"struct.llvm::ValueSymbolTable" = type opaque
-	%"struct.llvm::ilist<llvm::MachineBasicBlock>" = type { %"struct.llvm::iplist<llvm::MachineBasicBlock,llvm::ilist_traits<llvm::MachineBasicBlock> >" }
-	%"struct.llvm::ilist<llvm::MachineInstr>" = type { %"struct.llvm::iplist<llvm::MachineInstr,llvm::ilist_traits<llvm::MachineInstr> >" }
-	%"struct.llvm::ilist_node<llvm::Argument>" = type { %"struct.llvm::Argument"*, %"struct.llvm::Argument"* }
-	%"struct.llvm::ilist_node<llvm::BasicBlock>" = type { %"struct.llvm::BasicBlock"*, %"struct.llvm::BasicBlock"* }
-	%"struct.llvm::ilist_node<llvm::Function>" = type { %"struct.llvm::Function"*, %"struct.llvm::Function"* }
-	%"struct.llvm::ilist_node<llvm::Instruction>" = type { %"struct.llvm::Instruction"*, %"struct.llvm::Instruction"* }
-	%"struct.llvm::ilist_node<llvm::MachineBasicBlock>" = type { %"struct.llvm::MachineBasicBlock"*, %"struct.llvm::MachineBasicBlock"* }
-	%"struct.llvm::ilist_node<llvm::MachineInstr>" = type { %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineInstr"* }
-	%"struct.llvm::ilist_traits<llvm::Argument>" = type { %"struct.llvm::ilist_node<llvm::Argument>" }
-	%"struct.llvm::ilist_traits<llvm::BasicBlock>" = type { %"struct.llvm::ilist_node<llvm::BasicBlock>" }
-	%"struct.llvm::ilist_traits<llvm::Instruction>" = type { %"struct.llvm::ilist_node<llvm::Instruction>" }
-	%"struct.llvm::ilist_traits<llvm::MachineBasicBlock>" = type { %"struct.llvm::ilist_node<llvm::MachineBasicBlock>" }
-	%"struct.llvm::ilist_traits<llvm::MachineInstr>" = type { %"struct.llvm::ilist_node<llvm::MachineInstr>", %"struct.llvm::MachineBasicBlock"* }
-	%"struct.llvm::ilist_traits<llvm::RecyclerStruct>" = type { %"struct.llvm::RecyclerStruct" }
-	%"struct.llvm::iplist<llvm::Argument,llvm::ilist_traits<llvm::Argument> >" = type { %"struct.llvm::ilist_traits<llvm::Argument>", %"struct.llvm::Argument"* }
-	%"struct.llvm::iplist<llvm::BasicBlock,llvm::ilist_traits<llvm::BasicBlock> >" = type { %"struct.llvm::ilist_traits<llvm::BasicBlock>", %"struct.llvm::BasicBlock"* }
-	%"struct.llvm::iplist<llvm::Instruction,llvm::ilist_traits<llvm::Instruction> >" = type { %"struct.llvm::ilist_traits<llvm::Instruction>", %"struct.llvm::Instruction"* }
-	%"struct.llvm::iplist<llvm::MachineBasicBlock,llvm::ilist_traits<llvm::MachineBasicBlock> >" = type { %"struct.llvm::ilist_traits<llvm::MachineBasicBlock>", %"struct.llvm::MachineBasicBlock"* }
-	%"struct.llvm::iplist<llvm::MachineInstr,llvm::ilist_traits<llvm::MachineInstr> >" = type { %"struct.llvm::ilist_traits<llvm::MachineInstr>", %"struct.llvm::MachineInstr"* }
-	%"struct.llvm::iplist<llvm::RecyclerStruct,llvm::ilist_traits<llvm::RecyclerStruct> >" = type { %"struct.llvm::ilist_traits<llvm::RecyclerStruct>", %"struct.llvm::RecyclerStruct"* }
-	%"struct.std::IdxMBBPair" = type { i32, %"struct.llvm::MachineBasicBlock"* }
-	%"struct.std::_List_base<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >" = type { %"struct.llvm::ilist_traits<llvm::RecyclerStruct>" }
-	%"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" = type { %"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >::_Vector_impl" = type { %"struct.llvm::PassInfo"**, %"struct.llvm::PassInfo"**, %"struct.llvm::PassInfo"** }
-	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
-	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
-	%"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" = type { %"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >::_Vector_impl" = type { %"struct.llvm::AbstractTypeUser"**, %"struct.llvm::AbstractTypeUser"**, %"struct.llvm::AbstractTypeUser"** }
-	%"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >" = type { %"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >::_Vector_impl" = type { %"struct.llvm::CalleeSavedInfo"*, %"struct.llvm::CalleeSavedInfo"*, %"struct.llvm::CalleeSavedInfo"* }
-	%"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >" = type { %"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >::_Vector_impl" = type { %"struct.llvm::DebugLocTuple"*, %"struct.llvm::DebugLocTuple"*, %"struct.llvm::DebugLocTuple"* }
-	%"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >::_Vector_impl" = type { %"struct.llvm::MachineBasicBlock"**, %"struct.llvm::MachineBasicBlock"**, %"struct.llvm::MachineBasicBlock"** }
-	%"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >" = type { %"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >::_Vector_impl" = type { %"struct.llvm::MachineFrameInfo::StackObject"*, %"struct.llvm::MachineFrameInfo::StackObject"*, %"struct.llvm::MachineFrameInfo::StackObject"* }
-	%"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" = type { %"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >::_Vector_impl" = type { %"struct.llvm::MachineInstr"**, %"struct.llvm::MachineInstr"**, %"struct.llvm::MachineInstr"** }
-	%"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" = type { %"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >::_Vector_impl" }
-	%"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >::_Vector_impl" = type { %"struct.llvm::MachineOperand"*, %"struct.llvm::MachineOperand"*, %"struct.llvm::MachineOperand"* }
-	%"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >::_Vector_impl" = type { %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"*, %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"*, %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"* }
-	%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >::_Vector_impl" = type { %"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>"*, %"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>"*, %"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>"* }
-	%"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >::_Vector_impl" = type { %"struct.std::IdxMBBPair"*, %"struct.std::IdxMBBPair"*, %"struct.std::IdxMBBPair"* }
-	%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" = type { %"struct.std::pair<unsigned int,int>"*, %"struct.std::pair<unsigned int,int>"*, %"struct.std::pair<unsigned int,int>"* }
-	%"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >" = type { %"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >::_Vector_impl" }
-	%"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >::_Vector_impl" = type { %"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"* }
-	%"struct.std::list<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >" = type { %"struct.std::_List_base<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >" }
-	%"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>" = type { %"struct.llvm::PassInfo"*, %"struct.llvm::Pass"* }
-	%"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>" = type { %"struct.llvm::TargetRegisterClass"*, %"struct.llvm::MachineOperand"* }
-	%"struct.std::pair<llvm::DebugLocTuple,unsigned int>" = type { %"struct.llvm::DebugLocTuple", i32 }
-	%"struct.std::pair<llvm::MachineInstr*,unsigned int>" = type { %"struct.llvm::MachineInstr"*, i32 }
-	%"struct.std::pair<unsigned int,char>" = type { i32, i8 }
-	%"struct.std::pair<unsigned int,int>" = type { i32, i32 }
-	%"struct.std::pair<unsigned int,llvm::LiveInterval*>" = type { i32, %"struct.llvm::LiveInterval"* }
-	%"struct.std::string" = type { %"struct.llvm::BumpPtrAllocator" }
-	%"struct.std::vector<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" = type { %"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" }
-	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
-	%"struct.std::vector<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" = type { %"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" }
-	%"struct.std::vector<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >" = type { %"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >" }
-	%"struct.std::vector<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >" = type { %"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >" }
-	%"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >" }
-	%"struct.std::vector<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >" = type { %"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >" }
-	%"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" = type { %"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" }
-	%"struct.std::vector<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" = type { %"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" }
-	%"struct.std::vector<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" }
-	%"struct.std::vector<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >" }
-	%"struct.std::vector<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >" }
-	%"struct.std::vector<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >" }
-	%"struct.std::vector<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >" = type { %"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >" }
-@_ZZNK4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE15LookupBucketForERKS2_RPSt4pairIS2_jEE8__func__ = external constant [16 x i8]		; <[16 x i8]*> [#uses=1]
-@"\01LC6" = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
-@"\01LC7" = external constant [134 x i8]		; <[134 x i8]*> [#uses=1]
-@"\01LC8" = external constant [72 x i8]		; <[72 x i8]*> [#uses=1]
-@_ZZN4llvm13LiveIntervals24InsertMachineInstrInMapsEPNS_12MachineInstrEjE8__func__ = external constant [25 x i8]		; <[25 x i8]*> [#uses=1]
-@"\01LC51" = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
-
-define void @_ZN4llvm13LiveIntervals24InsertMachineInstrInMapsEPNS_12MachineInstrEj(%"struct.llvm::LiveIntervals"* nocapture %this, %"struct.llvm::MachineInstr"* %MI, i32 %Index) nounwind ssp {
-entry:
-	%0 = call i64 @_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE4findERKS2_(%"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >"* null, %"struct.llvm::MachineInstr"** null) nounwind ssp		; <i64> [#uses=1]
-	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
-	%tmp11 = inttoptr i32 %1 to %"struct.std::pair<llvm::MachineInstr*,unsigned int>"*		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=1]
-	%2 = load %"struct.std::pair<llvm::MachineInstr*,unsigned int>"** null, align 4		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=3]
-	%3 = getelementptr %"struct.llvm::LiveIntervals"* %this, i32 0, i32 12, i32 0		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=2]
-	%5 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %2, i32 %4		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=1]
-	br label %bb1.i.i.i
-
-bb.i.i.i:		; preds = %bb2.i.i.i
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br label %bb1.i.i.i
-
-bb1.i.i.i:		; preds = %bb.i.i.i, %entry
-	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i.i.i ]		; <i32> [#uses=2]
-	%tmp32 = shl i32 %indvar, 3		; <i32> [#uses=1]
-	%ctg2.sum = add i32 0, %tmp32		; <i32> [#uses=1]
-	%ctg237 = getelementptr i8* null, i32 %ctg2.sum		; <i8*> [#uses=1]
-	%.0.0.i = bitcast i8* %ctg237 to %"struct.std::pair<llvm::MachineInstr*,unsigned int>"*		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=2]
-	%6 = icmp eq %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %.0.0.i, %5		; <i1> [#uses=1]
-	br i1 %6, label %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit, label %bb2.i.i.i
-
-bb2.i.i.i:		; preds = %bb1.i.i.i
-	%7 = load %"struct.llvm::MachineInstr"** null, align 4		; <%"struct.llvm::MachineInstr"*> [#uses=1]
-	%8 = icmp eq %"struct.llvm::MachineInstr"* %7, inttoptr (i32 -8 to %"struct.llvm::MachineInstr"*)		; <i1> [#uses=1]
-	%or.cond.i.i.i21 = or i1 false, %8		; <i1> [#uses=1]
-	br i1 %or.cond.i.i.i21, label %bb.i.i.i, label %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit
-
-_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit:		; preds = %bb2.i.i.i, %bb1.i.i.i
-	%9 = icmp eq %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %tmp11, %.0.0.i		; <i1> [#uses=1]
-	br i1 %9, label %bb7, label %bb6
-
-bb6:		; preds = %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit
-	call void @__assert_rtn(i8* getelementptr ([25 x i8]* @_ZZN4llvm13LiveIntervals24InsertMachineInstrInMapsEPNS_12MachineInstrEjE8__func__, i32 0, i32 0), i8* getelementptr ([72 x i8]* @"\01LC8", i32 0, i32 0), i32 251, i8* getelementptr ([42 x i8]* @"\01LC51", i32 0, i32 0)) noreturn nounwind
-	unreachable
-
-bb7:		; preds = %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit
-	%10 = load %"struct.llvm::MachineInstr"** null, align 4		; <%"struct.llvm::MachineInstr"*> [#uses=2]
-	%11 = icmp eq %"struct.llvm::MachineInstr"* %10, inttoptr (i32 -8 to %"struct.llvm::MachineInstr"*)		; <i1> [#uses=1]
-	%or.cond40.i.i.i = or i1 false, %11		; <i1> [#uses=1]
-	br i1 %or.cond40.i.i.i, label %bb5.i.i.i, label %bb6.preheader.i.i.i
-
-bb6.preheader.i.i.i:		; preds = %bb7
-	%12 = add i32 %4, -1		; <i32> [#uses=1]
-	br label %bb6.i.i.i
-
-bb5.i.i.i:		; preds = %bb7
-	call void @__assert_rtn(i8* getelementptr ([16 x i8]* @_ZZNK4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE15LookupBucketForERKS2_RPSt4pairIS2_jEE8__func__, i32 0, i32 0), i8* getelementptr ([56 x i8]* @"\01LC6", i32 0, i32 0), i32 390, i8* getelementptr ([134 x i8]* @"\01LC7", i32 0, i32 0)) noreturn nounwind
-	unreachable
-
-bb6.i.i.i:		; preds = %bb17.i.i.i, %bb6.preheader.i.i.i
-	%FoundTombstone.1.i.i.i = phi %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* [ %FoundTombstone.0.i.i.i, %bb17.i.i.i ], [ null, %bb6.preheader.i.i.i ]		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=2]
-	%ProbeAmt.0.i.i.i = phi i32 [ 0, %bb17.i.i.i ], [ 1, %bb6.preheader.i.i.i ]		; <i32> [#uses=1]
-	%BucketNo.0.i.i.i = phi i32 [ %20, %bb17.i.i.i ], [ 0, %bb6.preheader.i.i.i ]		; <i32> [#uses=2]
-	%13 = and i32 %BucketNo.0.i.i.i, %12		; <i32> [#uses=2]
-	%14 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %2, i32 %13		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=2]
-	%15 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %2, i32 %13, i32 0		; <%"struct.llvm::MachineInstr"**> [#uses=1]
-	%16 = load %"struct.llvm::MachineInstr"** %15, align 4		; <%"struct.llvm::MachineInstr"*> [#uses=2]
-	%17 = icmp eq %"struct.llvm::MachineInstr"* %16, %10		; <i1> [#uses=1]
-	br i1 %17, label %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEEixERKS2_.exit, label %bb17.i.i.i
-
-bb17.i.i.i:		; preds = %bb6.i.i.i
-	%18 = icmp eq %"struct.llvm::MachineInstr"* %16, inttoptr (i32 -8 to %"struct.llvm::MachineInstr"*)		; <i1> [#uses=1]
-	%19 = icmp eq %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %FoundTombstone.1.i.i.i, null		; <i1> [#uses=1]
-	%or.cond.i.i.i = and i1 %18, %19		; <i1> [#uses=1]
-	%FoundTombstone.0.i.i.i = select i1 %or.cond.i.i.i, %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %14, %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %FoundTombstone.1.i.i.i		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=1]
-	%20 = add i32 %BucketNo.0.i.i.i, %ProbeAmt.0.i.i.i		; <i32> [#uses=1]
-	br label %bb6.i.i.i
-
-_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEEixERKS2_.exit:		; preds = %bb6.i.i.i
-	%21 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %14, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %Index, i32* %21, align 4
-	ret void
-}
-
-declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn
-
-declare i64 @_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE4findERKS2_(%"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >"* nocapture, %"struct.llvm::MachineInstr"** nocapture) nounwind ssp
diff --git a/test/CodeGen/X86/tls-1.ll b/test/CodeGen/X86/tls-1.ll
new file mode 100644
index 0000000..5f6cbe0
--- /dev/null
+++ b/test/CodeGen/X86/tls-1.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+@a = thread_local global i32 0                    ; <i32*> [#uses=0]
+@b = thread_local global i32 0                    ; <i32*> [#uses=0]
+
+; CHECK: .tbss _a$tlv$init, 4, 2
+; CHECK:        .section        __DATA,__thread_vars,thread_local_variables
+; CHECK:        .globl  _a
+; CHECK: _a:
+; CHECK:        .quad   ___tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _a$tlv$init
+
+; CHECK: .tbss _b$tlv$init, 4, 2
+; CHECK:        .globl  _b
+; CHECK: _b:
+; CHECK:        .quad   ___tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _b$tlv$init
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
new file mode 100644
index 0000000..fa98b78
--- /dev/null
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -asm-verbose=false -march=x86-64 -use-unknown-locations | FileCheck %s
+
+; The divide instruction does not have a debug location. CodeGen should
+; represent this in the debug information. This is checked by a check
+; for a label between the code for the add and the code for the divide,
+; which indicates that the add's location doesn't spill over unto the
+; divide.
+
+;      CHECK:         leal    (%rdi,%rsi), %eax
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT:         cltd
+; CHECK-NEXT:         idivl   %r8d
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT:         addl    %ecx, %eax
+; CHECK-NEXT:         ret
+; CHECK-NEXT: Ltmp
+
+define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %a = add  i32 %w, %x, !dbg !8
+  %b = sdiv i32 %a, %y
+  %c = add  i32 %b, %z, !dbg !8
+  ret i32 %c, !dbg !8
+}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"test.c", metadata !"/dir", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"test.c", metadata !".", metadata !"producer", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524299, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 4, i32 3, metadata !7, null}
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
new file mode 100644
index 0000000..3b24c0c
--- /dev/null
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -0,0 +1,30 @@
+; RUN: llc -O0 -asm-verbose  %s -o %t
+; RUN: grep DW_TAG_compile_unit %t | count 3
+; One for a.c, second one for b.c and third one for abbrev.
+
+define i32 @foo() nounwind readnone ssp {
+return:
+  ret i32 42, !dbg !0
+}
+
+define i32 @bar() nounwind readnone ssp {
+return:
+  ret i32 21, !dbg !8
+}
+
+!0 = metadata !{i32 3, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 0, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 524334, i32 0, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!13 = metadata !{i32 524309, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 524324, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-05-25-DotDebugLoc.ll b/test/DebugInfo/2010-05-25-DotDebugLoc.ll
new file mode 100644
index 0000000..4c477ee
--- /dev/null
+++ b/test/DebugInfo/2010-05-25-DotDebugLoc.ll
@@ -0,0 +1,239 @@
+; RUN: llc -O2 < %s | grep debug_loc12
+; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
+
+%0 = type { double }
+
+define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{float %a}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata !{float %b}, i64 0, metadata !11)
+  tail call void @llvm.dbg.value(metadata !{float %c}, i64 0, metadata !12)
+  tail call void @llvm.dbg.value(metadata !{float %d}, i64 0, metadata !13)
+  %0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %2 = fcmp olt float %0, %1, !dbg !19            ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb1, !dbg !19
+
+bb:                                               ; preds = %entry
+  %3 = fdiv float %c, %d, !dbg !20                ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %3}, i64 0, metadata !16), !dbg !20
+  %4 = fmul float %3, %c, !dbg !21                ; <float> [#uses=1]
+  %5 = fadd float %4, %d, !dbg !21                ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %5}, i64 0, metadata !14), !dbg !21
+  %6 = fmul float %3, %a, !dbg !22                ; <float> [#uses=1]
+  %7 = fadd float %6, %b, !dbg !22                ; <float> [#uses=1]
+  %8 = fdiv float %7, %5, !dbg !22                ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %8}, i64 0, metadata !17), !dbg !22
+  %9 = fmul float %3, %b, !dbg !23                ; <float> [#uses=1]
+  %10 = fsub float %9, %a, !dbg !23               ; <float> [#uses=1]
+  %11 = fdiv float %10, %5, !dbg !23              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %11}, i64 0, metadata !18), !dbg !23
+  br label %bb2, !dbg !23
+
+bb1:                                              ; preds = %entry
+  %12 = fdiv float %d, %c, !dbg !24               ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %12}, i64 0, metadata !16), !dbg !24
+  %13 = fmul float %12, %d, !dbg !25              ; <float> [#uses=1]
+  %14 = fadd float %13, %c, !dbg !25              ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %14}, i64 0, metadata !14), !dbg !25
+  %15 = fmul float %12, %b, !dbg !26              ; <float> [#uses=1]
+  %16 = fadd float %15, %a, !dbg !26              ; <float> [#uses=1]
+  %17 = fdiv float %16, %14, !dbg !26             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %17}, i64 0, metadata !17), !dbg !26
+  %18 = fmul float %12, %a, !dbg !27              ; <float> [#uses=1]
+  %19 = fsub float %b, %18, !dbg !27              ; <float> [#uses=1]
+  %20 = fdiv float %19, %14, !dbg !27             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %20}, i64 0, metadata !18), !dbg !27
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %y.0 = phi float [ %11, %bb ], [ %20, %bb1 ]    ; <float> [#uses=5]
+  %x.0 = phi float [ %8, %bb ], [ %17, %bb1 ]     ; <float> [#uses=5]
+  %21 = fcmp uno float %x.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %22 = fcmp uno float %y.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %or.cond = and i1 %21, %22                      ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb4, label %bb46, !dbg !28
+
+bb4:                                              ; preds = %bb2
+  %23 = fcmp une float %c, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %24 = fcmp une float %d, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond93 = or i1 %23, %24                     ; <i1> [#uses=1]
+  br i1 %or.cond93, label %bb9, label %bb6, !dbg !29
+
+bb6:                                              ; preds = %bb4
+  %25 = fcmp uno float %a, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %26 = fcmp uno float %b, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond94 = and i1 %25, %26                    ; <i1> [#uses=1]
+  br i1 %or.cond94, label %bb9, label %bb8, !dbg !29
+
+bb8:                                              ; preds = %bb6
+  %27 = tail call float @copysignf(float 0x7FF0000000000000, float %c) nounwind readnone, !dbg !30 ; <float> [#uses=2]
+  %28 = fmul float %27, %a, !dbg !30              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %28}, i64 0, metadata !17), !dbg !30
+  %29 = fmul float %27, %b, !dbg !31              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %29}, i64 0, metadata !18), !dbg !31
+  br label %bb46, !dbg !31
+
+bb9:                                              ; preds = %bb6, %bb4
+  %30 = fcmp ord float %a, 0.000000e+00           ; <i1> [#uses=1]
+  %31 = fsub float %a, %a, !dbg !32               ; <float> [#uses=3]
+  %32 = fcmp uno float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %33 = and i1 %30, %32, !dbg !32                 ; <i1> [#uses=2]
+  br i1 %33, label %bb14, label %bb11, !dbg !32
+
+bb11:                                             ; preds = %bb9
+  %34 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %35 = fsub float %b, %b, !dbg !32               ; <float> [#uses=1]
+  %36 = fcmp uno float %35, 0.000000e+00          ; <i1> [#uses=1]
+  %37 = and i1 %34, %36, !dbg !32                 ; <i1> [#uses=1]
+  br i1 %37, label %bb14, label %bb27, !dbg !32
+
+bb14:                                             ; preds = %bb11, %bb9
+  %38 = fsub float %c, %c, !dbg !32               ; <float> [#uses=1]
+  %39 = fcmp ord float %38, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %39, label %bb15, label %bb27, !dbg !32
+
+bb15:                                             ; preds = %bb14
+  %40 = fsub float %d, %d, !dbg !32               ; <float> [#uses=1]
+  %41 = fcmp ord float %40, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %41, label %bb16, label %bb27, !dbg !32
+
+bb16:                                             ; preds = %bb15
+  %iftmp.0.0 = select i1 %33, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %42 = tail call float @copysignf(float %iftmp.0.0, float %a) nounwind readnone, !dbg !33 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %42}, i64 0, metadata !0), !dbg !33
+  %43 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %44 = fsub float %b, %b, !dbg !34               ; <float> [#uses=1]
+  %45 = fcmp uno float %44, 0.000000e+00          ; <i1> [#uses=1]
+  %46 = and i1 %43, %45, !dbg !34                 ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %46, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %47 = tail call float @copysignf(float %iftmp.1.0, float %b) nounwind readnone, !dbg !34 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %47}, i64 0, metadata !11), !dbg !34
+  %48 = fmul float %42, %c, !dbg !35              ; <float> [#uses=1]
+  %49 = fmul float %47, %d, !dbg !35              ; <float> [#uses=1]
+  %50 = fadd float %48, %49, !dbg !35             ; <float> [#uses=1]
+  %51 = fmul float %50, 0x7FF0000000000000, !dbg !35 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %51}, i64 0, metadata !17), !dbg !35
+  %52 = fmul float %47, %c, !dbg !36              ; <float> [#uses=1]
+  %53 = fmul float %42, %d, !dbg !36              ; <float> [#uses=1]
+  %54 = fsub float %52, %53, !dbg !36             ; <float> [#uses=1]
+  %55 = fmul float %54, 0x7FF0000000000000, !dbg !36 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %55}, i64 0, metadata !18), !dbg !36
+  br label %bb46, !dbg !36
+
+bb27:                                             ; preds = %bb15, %bb14, %bb11
+  %56 = fcmp ord float %c, 0.000000e+00           ; <i1> [#uses=1]
+  %57 = fsub float %c, %c, !dbg !37               ; <float> [#uses=1]
+  %58 = fcmp uno float %57, 0.000000e+00          ; <i1> [#uses=1]
+  %59 = and i1 %56, %58, !dbg !37                 ; <i1> [#uses=2]
+  br i1 %59, label %bb33, label %bb30, !dbg !37
+
+bb30:                                             ; preds = %bb27
+  %60 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %61 = fsub float %d, %d, !dbg !37               ; <float> [#uses=1]
+  %62 = fcmp uno float %61, 0.000000e+00          ; <i1> [#uses=1]
+  %63 = and i1 %60, %62, !dbg !37                 ; <i1> [#uses=1]
+  %64 = fcmp ord float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %or.cond95 = and i1 %63, %64                    ; <i1> [#uses=1]
+  br i1 %or.cond95, label %bb34, label %bb46, !dbg !37
+
+bb33:                                             ; preds = %bb27
+  %.old = fcmp ord float %31, 0.000000e+00        ; <i1> [#uses=1]
+  br i1 %.old, label %bb34, label %bb46, !dbg !37
+
+bb34:                                             ; preds = %bb33, %bb30
+  %65 = fsub float %b, %b, !dbg !37               ; <float> [#uses=1]
+  %66 = fcmp ord float %65, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %66, label %bb35, label %bb46, !dbg !37
+
+bb35:                                             ; preds = %bb34
+  %iftmp.2.0 = select i1 %59, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %67 = tail call float @copysignf(float %iftmp.2.0, float %c) nounwind readnone, !dbg !38 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %67}, i64 0, metadata !12), !dbg !38
+  %68 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %69 = fsub float %d, %d, !dbg !39               ; <float> [#uses=1]
+  %70 = fcmp uno float %69, 0.000000e+00          ; <i1> [#uses=1]
+  %71 = and i1 %68, %70, !dbg !39                 ; <i1> [#uses=1]
+  %iftmp.3.0 = select i1 %71, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %72 = tail call float @copysignf(float %iftmp.3.0, float %d) nounwind readnone, !dbg !39 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %72}, i64 0, metadata !13), !dbg !39
+  %73 = fmul float %67, %a, !dbg !40              ; <float> [#uses=1]
+  %74 = fmul float %72, %b, !dbg !40              ; <float> [#uses=1]
+  %75 = fadd float %73, %74, !dbg !40             ; <float> [#uses=1]
+  %76 = fmul float %75, 0.000000e+00, !dbg !40    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %76}, i64 0, metadata !17), !dbg !40
+  %77 = fmul float %67, %b, !dbg !41              ; <float> [#uses=1]
+  %78 = fmul float %72, %a, !dbg !41              ; <float> [#uses=1]
+  %79 = fsub float %77, %78, !dbg !41             ; <float> [#uses=1]
+  %80 = fmul float %79, 0.000000e+00, !dbg !41    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %80}, i64 0, metadata !18), !dbg !41
+  br label %bb46, !dbg !41
+
+bb46:                                             ; preds = %bb35, %bb34, %bb33, %bb30, %bb16, %bb8, %bb2
+  %y.1 = phi float [ %80, %bb35 ], [ %y.0, %bb34 ], [ %y.0, %bb33 ], [ %y.0, %bb30 ], [ %55, %bb16 ], [ %29, %bb8 ], [ %y.0, %bb2 ] ; <float> [#uses=2]
+  %x.1 = phi float [ %76, %bb35 ], [ %x.0, %bb34 ], [ %x.0, %bb33 ], [ %x.0, %bb30 ], [ %51, %bb16 ], [ %28, %bb8 ], [ %x.0, %bb2 ] ; <float> [#uses=1]
+  %81 = fmul float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %82 = fadd float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %tmpr = fadd float %x.1, %81, !dbg !42          ; <float> [#uses=1]
+  %tmp89 = bitcast float %tmpr to i32             ; <i32> [#uses=1]
+  %tmp90 = zext i32 %tmp89 to i64                 ; <i64> [#uses=1]
+  %tmp85 = bitcast float %82 to i32               ; <i32> [#uses=1]
+  %tmp86 = zext i32 %tmp85 to i64                 ; <i64> [#uses=1]
+  %tmp87 = shl i64 %tmp86, 32                     ; <i64> [#uses=1]
+  %ins = or i64 %tmp90, %tmp87                    ; <i64> [#uses=1]
+  %tmp84 = bitcast i64 %ins to double             ; <double> [#uses=1]
+  %mrv75 = insertvalue %0 undef, double %tmp84, 0, !dbg !42 ; <%0> [#uses=1]
+  ret %0 %mrv75, !dbg !42
+}
+
+declare float @fabsf(float)
+
+declare float @copysignf(float, float) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !11, !12, !13, !14, !16, !17, !18}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
+!6 = metadata !{i32 524310, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524310, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 524324, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 524545, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 524545, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 524544, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 524299, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 524544, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 524544, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 524544, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 1929, i32 0, metadata !15, null}
+!20 = metadata !{i32 1931, i32 0, metadata !15, null}
+!21 = metadata !{i32 1932, i32 0, metadata !15, null}
+!22 = metadata !{i32 1933, i32 0, metadata !15, null}
+!23 = metadata !{i32 1934, i32 0, metadata !15, null}
+!24 = metadata !{i32 1938, i32 0, metadata !15, null}
+!25 = metadata !{i32 1939, i32 0, metadata !15, null}
+!26 = metadata !{i32 1940, i32 0, metadata !15, null}
+!27 = metadata !{i32 1941, i32 0, metadata !15, null}
+!28 = metadata !{i32 1946, i32 0, metadata !15, null}
+!29 = metadata !{i32 1948, i32 0, metadata !15, null}
+!30 = metadata !{i32 1950, i32 0, metadata !15, null}
+!31 = metadata !{i32 1951, i32 0, metadata !15, null}
+!32 = metadata !{i32 1953, i32 0, metadata !15, null}
+!33 = metadata !{i32 1955, i32 0, metadata !15, null}
+!34 = metadata !{i32 1956, i32 0, metadata !15, null}
+!35 = metadata !{i32 1957, i32 0, metadata !15, null}
+!36 = metadata !{i32 1958, i32 0, metadata !15, null}
+!37 = metadata !{i32 1960, i32 0, metadata !15, null}
+!38 = metadata !{i32 1962, i32 0, metadata !15, null}
+!39 = metadata !{i32 1963, i32 0, metadata !15, null}
+!40 = metadata !{i32 1964, i32 0, metadata !15, null}
+!41 = metadata !{i32 1965, i32 0, metadata !15, null}
+!42 = metadata !{i32 1969, i32 0, metadata !15, null}
diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll
new file mode 100644
index 0000000..3e2cd3c
--- /dev/null
+++ b/test/Feature/metadata.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; PR7105
+
+define void @foo() {
+  call void @llvm.zonk(metadata !1, i64 0, metadata !1)
+  ret void
+}
+
+declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone
+
+!named = !{!0}
+!0 = metadata !{i8** null}
+!1 = metadata !{i8* null}
diff --git a/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp b/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp
new file mode 100644
index 0000000..e6165c0
--- /dev/null
+++ b/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp
@@ -0,0 +1,43 @@
+// RUN: %llvmgcc -S -O0 -g %s -o /dev/null
+// PR 7104
+
+struct A {
+  int Ai;
+}; 
+
+struct B : public A {}; 
+struct C : public B {}; 
+
+const char * f(int C::*){ return ""; } 
+int f(int B::*) { return 1; } 
+
+struct D : public C {}; 
+
+const char * g(int B::*){ return ""; } 
+int g(int D::*) { return 1; } 
+
+void test() 
+{
+  int i = f(&A::Ai);
+
+  const char * str = g(&A::Ai);
+}
+
+// conversion of B::* to C::* is better than conversion of A::* to C::*
+typedef void (A::*pmfa)();
+typedef void (B::*pmfb)();
+typedef void (C::*pmfc)();
+
+struct X {
+  operator pmfa();
+  operator pmfb();
+};
+
+
+void g(pmfc);
+
+void test2(X x) 
+{
+  g(x);
+}
+
diff --git a/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp b/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
new file mode 100644
index 0000000..203b542
--- /dev/null
+++ b/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
@@ -0,0 +1,31 @@
+// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | not grep ZN12basic_stringIcEC1Ev
+// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | grep ZN12basic_stringIcED1Ev | count 2
+
+template<class charT> 
+class basic_string
+{
+public:
+	basic_string();
+	~basic_string();
+};
+
+template <class charT>
+__attribute__ ((__visibility__("hidden"), __always_inline__)) inline
+basic_string<charT>::basic_string()
+{
+}
+
+template <class charT>
+inline
+basic_string<charT>::~basic_string()
+{
+}
+
+typedef basic_string<char> string;
+
+extern template class basic_string<char>;
+
+int main()
+{
+	string s;
+}
diff --git a/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp b/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
new file mode 100644
index 0000000..66acfbe
--- /dev/null
+++ b/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
@@ -0,0 +1,17 @@
+//RUN: %llvmgxx -O0 -emit-llvm -S -g -o - %s | grep DW_TAG_auto_variable
+class Foo
+{
+ public:
+  int x;
+  int y;
+  Foo (int i, int j) { x = i; y = j; }
+};
+
+
+Foo foo(10, 11);
+
+int main() {
+  int Foo::* pmi = &Foo::y;
+  return foo.*pmi;
+}
+
diff --git a/test/FrontendC++/thunk-weak-odr.cpp b/test/FrontendC++/thunk-weak-odr.cpp
new file mode 100644
index 0000000..1d9d4dd
--- /dev/null
+++ b/test/FrontendC++/thunk-weak-odr.cpp
@@ -0,0 +1,33 @@
+// RUN: %llvmgxx %s -S -o - | FileCheck %s
+// <rdar://problem/7929157>
+
+struct A {
+  virtual int f() { return 1; }
+};
+
+struct B {
+  virtual int f() { return 2; }
+};
+
+struct C : A, B {
+  virtual int f() { return 3; }
+};
+
+struct D : C {
+  virtual int f() { return 4; }
+};
+
+static int f(D* d) {
+  B* b = d;
+  return b->f();
+};
+
+int g() {
+  D d;
+  return f(&d);
+}
+
+// Thunks should be marked as "weak ODR", not just "weak".
+//
+// CHECK: define weak_odr i32 @_ZThn{{[48]}}_N1C1fEv
+// CHECK: define weak_odr i32 @_ZThn{{[48]}}_N1D1fEv
diff --git a/test/FrontendC/2007-04-11-InlineStorageClassC89.c b/test/FrontendC/2007-04-11-InlineStorageClassC89.c
index ab1f556..ec2b1ec 100644
--- a/test/FrontendC/2007-04-11-InlineStorageClassC89.c
+++ b/test/FrontendC/2007-04-11-InlineStorageClassC89.c
@@ -11,8 +11,8 @@
 // RUN:   grep -v linkonce | count 1
 // RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep declare | \
-// RUN:   grep xextnoWeak | grep -v internal | grep -v weak | \
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN:   grep xextnoWeak | grep available_externally | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
 inline int xglobWeak(int) __attribute__((weak));
 inline int xglobWeak (int i) {
diff --git a/test/FrontendC/2007-04-11-InlineStorageClassC99.c b/test/FrontendC/2007-04-11-InlineStorageClassC99.c
index f6193aa..89af278 100644
--- a/test/FrontendC/2007-04-11-InlineStorageClassC99.c
+++ b/test/FrontendC/2007-04-11-InlineStorageClassC99.c
@@ -6,13 +6,13 @@
 // RUN:   grep xWeaknoinline | grep weak | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xWeakextnoinline | grep weak | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep declare | \
-// RUN:   grep xglobnoWeak | grep -v internal | grep -v weak | \
+// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN:   grep xglobnoWeak | grep available_externally | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
-// RUN:   grep xextnoWeak | grep -v internal | grep -v weak | \
+// RUN:   grep xextnoWeak | grep -v available_externally | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
 inline int xglobWeak(int) __attribute__((weak));
 inline int xglobWeak (int i) {
diff --git a/test/FrontendC/2010-05-14-Optimized-VarType.c b/test/FrontendC/2010-05-14-Optimized-VarType.c
new file mode 100644
index 0000000..2aa85b5
--- /dev/null
+++ b/test/FrontendC/2010-05-14-Optimized-VarType.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc %s -Os -S -g -o - | grep DW_TAG_structure_type | count 1
+// Variable 'a' is optimized but the debug info should preserve its type info.
+#include <stdlib.h>
+
+struct foo {
+	int Attribute;
+};
+
+void *getfoo(void) __attribute__((noinline));
+
+void *getfoo(void)
+{
+	int *x = malloc(sizeof(int));
+	*x = 42;
+	return (void *)x;
+}
+
+int main(int argc, char *argv[]) {
+	struct foo *a = (struct foo *)getfoo();
+
+	return a->Attribute;
+}
+
diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c
new file mode 100644
index 0000000..34abbe3
--- /dev/null
+++ b/test/FrontendC/2010-05-18-asmsched.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc %s -c -O3 -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s
+// r9 used to be clobbered before its value was moved to r10.  7993104.
+
+void foo(int x, int y) {
+// CHECK: bar
+// CHECK: movq  %r9, %r10
+// CHECK: movq  %rdi, %r9
+// CHECK: bar
+  register int lr9 asm("r9") = x;
+  register int lr10 asm("r10") = y;
+  int foo;
+  asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10));
+  foo = lr9;
+  lr9 = x;
+  lr10 = foo;
+  asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10));
+}
+\ No newline at end of file
diff --git a/test/FrontendC/2010-05-18-palignr.c b/test/FrontendC/2010-05-18-palignr.c
new file mode 100644
index 0000000..0b78eed
--- /dev/null
+++ b/test/FrontendC/2010-05-18-palignr.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc -mssse3 -S -o - %s | llc -mtriple=x86_64-apple-darwin | FileCheck %s
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+#include <tmmintrin.h>
+
+int main ()
+{
+#if defined( __SSSE3__ )
+
+#define vec_rld_epi16( _a, _i )  ({ vSInt16 _t = _a; _t = _mm_alignr_epi8( _t, _t, _i ); /*return*/ _t; })
+  typedef int16_t     vSInt16         __attribute__ ((__vector_size__ (16)));
+
+  short   dtbl[] = {1,2,3,4,5,6,7,8};
+  vSInt16 *vdtbl = (vSInt16*) dtbl;
+
+  vSInt16 v0;
+  v0 = *vdtbl;
+  // CHECK: pshufd  $57
+  v0 = vec_rld_epi16( v0, 4 );
+
+  return 0;
+#endif
+}
diff --git a/test/FrontendC/2010-05-26-AsmSideEffect.c b/test/FrontendC/2010-05-26-AsmSideEffect.c
new file mode 100644
index 0000000..c5f7579
--- /dev/null
+++ b/test/FrontendC/2010-05-26-AsmSideEffect.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -emit-llvm -o - | FileCheck %s
+// Radar 8026855
+
+int test (void *src) {
+  register int w0 asm ("0");
+  // CHECK: call i32 asm sideeffect
+  asm ("ldr %0, [%1]": "=r" (w0): "r" (src));
+  // The asm to read the value of w0 has a sideeffect for a different reason
+  // (see 2010-05-18-asmsched.c) but that's not what this is testing for.
+  // CHECK: call i32 asm
+  return w0;
+}
diff --git a/test/FrontendC/pr2394.c b/test/FrontendC/pr2394.c
new file mode 100644
index 0000000..ca8b046
--- /dev/null
+++ b/test/FrontendC/pr2394.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+struct __attribute((packed)) x {int a : 24;};
+int a(struct x* g) {
+  // CHECK: load i24
+  return g->a;
+}
diff --git a/test/MC/AsmParser/X86/x86_32-bit_cat.s b/test/MC/AsmParser/X86/x86_32-bit_cat.s
index ec2bfa4..9002c60 100644
--- a/test/MC/AsmParser/X86/x86_32-bit_cat.s
+++ b/test/MC/AsmParser/X86/x86_32-bit_cat.s
@@ -163,6 +163,18 @@
 // CHECK: 	popw	32493
         	popw	0x7eed
 
+// CHECK: 	pushf
+        	pushfl
+
+// CHECK: 	pushfl
+        	pushfl
+
+// CHECK: 	popf
+        	popfl
+
+// CHECK: 	popfl
+        	popfl
+
 // CHECK: 	clc
         	clc
 
@@ -2482,6 +2494,12 @@
 // CHECK: 	clflush	305419896
         	clflush	0x12345678
 
+// CHECK: 	pause
+        	pause
+
+// CHECK: 	sfence
+        	sfence
+
 // CHECK: 	lfence
         	lfence
 
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index f973cb23..994dbb8 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -9962,11 +9962,16 @@
 // CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x7d]
                 aeskeygenassist $125, (%edx,%eax,4), %xmm2
 
+// rdar://8017638
+// CHECK: aeskeygenassist	$128, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x80]
+		aeskeygenassist $128, %xmm1, %xmm2
+
 // rdar://7840289
 // CHECK: pshufb	CPI1_0(%rip), %xmm1
 // CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
 // CHECK:  fixup A - offset: 5, value: CPI1_0-4
-pshufb	CPI1_0(%rip), %xmm1   
+pshufb	CPI1_0(%rip), %xmm1
 
 // rdar://7910087
 // CHECK: bsfw	%bx, %bx
@@ -10018,7 +10023,7 @@ pshufb	CPI1_0(%rip), %xmm1
 // radr://7914715
 // CHECK: fcoml   3735928559(%ebx,%ecx,8)
 // CHECK:  encoding: [0xdc,0x94,0xcb,0xef,0xbe,0xad,0xde]
-          fcoml   3735928559(%ebx,%ecx,8) 
+          fcoml   3735928559(%ebx,%ecx,8)
 
 // CHECK: fcoms   32493
 // CHECK:  encoding: [0xd8,0x15,0xed,0x7e,0x00,0x00]
diff --git a/test/MC/AsmParser/X86/x86_32-new-encoder.s b/test/MC/AsmParser/X86/x86_32-new-encoder.s
index c00007f..0b7d327 100644
--- a/test/MC/AsmParser/X86/x86_32-new-encoder.s
+++ b/test/MC/AsmParser/X86/x86_32-new-encoder.s
@@ -1,5 +1,11 @@
 // RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
 
+	pause
+// CHECK: pause
+// CHECK: encoding: [0xf3,0x90]
+	sfence
+// CHECK: sfence
+// CHECK: encoding: [0x0f,0xae,0xf8]
 	lfence
 // CHECK: lfence
 // CHECK: encoding: [0x0f,0xae,0xe8]
@@ -50,3 +56,278 @@ rdtscp
 
 // CHECK: shrl	%eax                    # encoding: [0xd1,0xe8]
         shrl $1, %eax
+
+// moffset forms of moves, rdar://7947184
+movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
+movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
+movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
+
+// rdar://7973775
+into
+// CHECK: into
+// CHECK:  encoding: [0xce]
+int3
+// CHECK: int3
+// CHECK:  encoding: [0xcc]
+int $4
+// CHECK: int $4
+// CHECK:  encoding: [0xcd,0x04]
+int $255
+// CHECK: int $255
+// CHECK:  encoding: [0xcd,0xff]
+
+// CHECK: pushfl	# encoding: [0x9c]
+        pushf
+// CHECK: pushfl	# encoding: [0x9c]
+        pushfl
+// CHECK: popfl	        # encoding: [0x9d]
+        popf
+// CHECK: popfl	        # encoding: [0x9d]
+        popfl
+
+// rdar://8014869
+retl
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+
+// rdar://7973854
+// CHECK: cmoval	%eax, %edx
+// CHECK:  encoding: [0x0f,0x47,0xd0]
+        	cmoval	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovael	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovbel	%eax,%edx
+
+// CHECK: cmovbl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x42,0xd0]
+        	cmovbl	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovbel	%eax,%edx
+
+// CHECK: cmovbl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x42,0xd0]
+        	cmovcl	%eax,%edx
+
+// CHECK: cmovel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x44,0xd0]
+        	cmovel	%eax,%edx
+
+// CHECK: cmovgl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4f,0xd0]
+        	cmovgl	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovgel	%eax,%edx
+
+// CHECK: cmovll	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4c,0xd0]
+        	cmovll	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovlel	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovnal	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovnbl	%eax,%edx
+
+// CHECK: cmoval	%eax, %edx
+// CHECK:  encoding: [0x0f,0x47,0xd0]
+        	cmovnbel	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovncl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovngl	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovnl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovngl	%eax,%edx
+
+// CHECK: cmovll	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4c,0xd0]
+        	cmovngel	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovnll	%eax,%edx
+
+// CHECK: cmovgl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4f,0xd0]
+        	cmovnlel	%eax,%edx
+
+// CHECK: cmovnol	%eax, %edx
+// CHECK:  encoding: [0x0f,0x41,0xd0]
+        	cmovnol	%eax,%edx
+
+// CHECK: cmovnpl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4b,0xd0]
+        	cmovnpl	%eax,%edx
+
+// CHECK: cmovnsl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x49,0xd0]
+        	cmovnsl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnzl	%eax,%edx
+
+// CHECK: cmovol	%eax, %edx
+// CHECK:  encoding: [0x0f,0x40,0xd0]
+        	cmovol	%eax,%edx
+
+// CHECK: cmovpl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4a,0xd0]
+        	cmovpl	%eax,%edx
+
+// CHECK: cmovsl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x48,0xd0]
+        	cmovsl	%eax,%edx
+
+// CHECK: cmovel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x44,0xd0]
+        	cmovzl	%eax,%edx
+
+// CHECK: cmpps	$0, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
+        cmpps $0, %xmm0, %xmm1
+// CHECK:	cmpps	$0, (%eax), %xmm1
+// CHECK: encoding: [0x0f,0xc2,0x08,0x00]
+        cmpps $0, 0(%eax), %xmm1
+// CHECK:	cmppd	$0, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x00]
+        cmppd $0, %xmm0, %xmm1
+// CHECK:	cmppd	$0, (%eax), %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0x08,0x00]
+        cmppd $0, 0(%eax), %xmm1
+// CHECK:	cmpss	$0, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x00]
+        cmpss $0, %xmm0, %xmm1
+// CHECK:	cmpss	$0, (%eax), %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0x08,0x00]
+        cmpss $0, 0(%eax), %xmm1
+// CHECK:	cmpsd	$0, %xmm0, %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x00]
+        cmpsd $0, %xmm0, %xmm1
+// CHECK:	cmpsd	$0, (%eax), %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0x08,0x00]
+        cmpsd $0, 0(%eax), %xmm1
+
+// Check matching of instructions which embed the SSE comparison code.
+
+// CHECK: cmpps $0, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
+        cmpeqps %xmm0, %xmm1
+
+// CHECK: cmppd $1, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x01]
+        cmpltpd %xmm0, %xmm1
+
+// CHECK: cmpss $2, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x02]
+        cmpless %xmm0, %xmm1
+
+// CHECK: cmppd $3, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x03]
+        cmpunordpd %xmm0, %xmm1
+
+// CHECK: cmpps $4, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x04]
+        cmpneqps %xmm0, %xmm1
+
+// CHECK: cmppd $5, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x05]
+        cmpnltpd %xmm0, %xmm1
+
+// CHECK: cmpss $6, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x06]
+        cmpnless %xmm0, %xmm1
+
+// CHECK: cmpsd $7, %xmm0, %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x07]
+        cmpordsd %xmm0, %xmm1
+
+// rdar://7995856
+// CHECK: fmul	%st(0)
+// CHECK:  encoding: [0xd8,0xc8]
+        fmul %st(0), %st
+
+// CHECK: fadd	%st(0)
+// CHECK:  encoding: [0xd8,0xc0]
+        fadd %st(0), %st
+
+// CHECK: fsub	%st(0)
+// CHECK:  encoding: [0xd8,0xe0]
+        fsub %st(0), %st
+
+// CHECK: fsubr	%st(0)
+// CHECK:  encoding: [0xd8,0xe8]
+        fsubr %st(0), %st
+
+// CHECK: fdivr	%st(0)
+// CHECK:  encoding: [0xd8,0xf8]
+        fdivr %st(0), %st
+
+// CHECK: fdiv	%st(0)
+// CHECK:  encoding: [0xd8,0xf0]
+        fdiv %st(0), %st
+
+// radr://8017519
+// CHECK: movl	%cs, %eax
+// CHECK:  encoding: [0x8c,0xc8]
+        movl %cs, %eax
+
+// CHECK: movw	%cs, %ax
+// CHECK:  encoding: [0x66,0x8c,0xc8]
+        movw %cs, %ax
+
+// CHECK: movl	%cs, (%eax)
+// CHECK:  encoding: [0x8c,0x08]
+        movl %cs, (%eax)
+
+// CHECK: movw	%cs, (%eax)
+// CHECK:  encoding: [0x66,0x8c,0x08]
+        movw %cs, (%eax)
+
+// CHECK: movl	%eax, %cs
+// CHECK:  encoding: [0x8e,0xc8]
+        movl %eax, %cs
+
+// CHECK: movl	(%eax), %cs
+// CHECK:  encoding: [0x8e,0x08]
+        movl (%eax), %cs
+
+// CHECK: movw	(%eax), %cs
+// CHECK:  encoding: [0x66,0x8e,0x08]
+        movw (%eax), %cs
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 3920c5b..036f7d4 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -71,3 +71,35 @@
 // CHECK: crc32q 	4(%rbx), %rax
 // CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
         crc32q	4(%rbx), %rax
+
+// CHECK: movd %r8, %mm1
+// CHECK:  encoding: [0x49,0x0f,0x6e,0xc8]
+movd %r8, %mm1
+
+// CHECK: movd %r8d, %mm1
+// CHECK:  encoding: [0x41,0x0f,0x6e,0xc8]
+movd %r8d, %mm1
+
+// CHECK: movd %rdx, %mm1
+// CHECK:  encoding: [0x48,0x0f,0x6e,0xca]
+movd %rdx, %mm1
+
+// CHECK: movd %edx, %mm1
+// CHECK:  encoding: [0x0f,0x6e,0xca]
+movd %edx, %mm1
+
+// CHECK: movd %mm1, %r8
+// CHECK:  encoding: [0x49,0x0f,0x7e,0xc8]
+movd %mm1, %r8
+
+// CHECK: movd %mm1, %r8d
+// CHECK:  encoding: [0x41,0x0f,0x7e,0xc8]
+movd %mm1, %r8d
+
+// CHECK: movd %mm1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0x7e,0xca]
+movd %mm1, %rdx
+
+// CHECK: movd %mm1, %edx
+// CHECK:  encoding: [0x0f,0x7e,0xca]
+movd %mm1, %edx
diff --git a/test/MC/AsmParser/X86/x86_64-imm-widths.s b/test/MC/AsmParser/X86/x86_64-imm-widths.s
new file mode 100644
index 0000000..6243717
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-imm-widths.s
@@ -0,0 +1,105 @@
+// RUN: llvm-mc -triple x86_64- --show-encoding %s | FileCheck %s
+
+// CHECK: addb $0, %al
+// CHECK: encoding: [0x04,0x00]
+ 	addb $0x00, %al
+
+// CHECK: addb $127, %al
+// CHECK: encoding: [0x04,0x7f]
+ 	addb $0x7F, %al
+
+// CHECK: addb $128, %al
+// CHECK: encoding: [0x04,0x80]
+ 	addb $0x80, %al
+
+// CHECK: addb $255, %al
+// CHECK: encoding: [0x04,0xff]
+ 	addb $0xFF, %al
+
+// CHECK: addw $0, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x00]
+ 	addw $0x0000, %ax
+
+// CHECK: addw $127, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x7f]
+ 	addw $0x007F, %ax
+
+// CHECK: addw $65408, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x80]
+ 	addw $0xFF80, %ax
+
+// CHECK: addw $65535, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0xff]
+	addw $0xFFFF, %ax
+
+// CHECK: addl $0, %eax
+// CHECK: encoding: [0x83,0xc0,0x00]
+ 	addl $0x00000000, %eax
+
+// CHECK: addl $127, %eax
+// CHECK: encoding: [0x83,0xc0,0x7f]
+ 	addl $0x0000007F, %eax
+
+// CHECK: addl $65408, %eax
+// CHECK: encoding: [0x05,0x80,0xff,0x00,0x00]
+ 	addl $0xFF80, %eax
+
+// CHECK: addl $65535, %eax
+// CHECK: encoding: [0x05,0xff,0xff,0x00,0x00]
+	addl $0xFFFF, %eax
+
+// CHECK: addl $4294967168, %eax
+// CHECK: encoding: [0x83,0xc0,0x80]
+ 	addl $0xFFFFFF80, %eax
+
+// CHECK: addl $4294967295, %eax
+// CHECK: encoding: [0x83,0xc0,0xff]
+ 	addl $0xFFFFFFFF, %eax
+
+// CHECK: addq $0, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x00]
+ 	addq $0x0000000000000000, %rax
+
+// CHECK: addq $127, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x7f]
+ 	addq $0x000000000000007F, %rax
+
+// CHECK: addq $-128, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x80]
+ 	addq $0xFFFFFFFFFFFFFF80, %rax
+
+// CHECK: addq $-1, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0xff]
+ 	addq $0xFFFFFFFFFFFFFFFF, %rax
+
+// CHECK: addq $0, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x00]
+ 	addq $0x0000000000000000, %rax
+
+// CHECK: addq $65408, %rax
+// CHECK: encoding: [0x48,0x05,0x80,0xff,0x00,0x00]
+ 	addq $0xFF80, %rax
+
+// CHECK: addq $65535, %rax
+// CHECK: encoding: [0x48,0x05,0xff,0xff,0x00,0x00]
+	addq $0xFFFF, %rax
+
+// CHECK: movq $4294967168, %rax
+// CHECK: encoding: [0x48,0xb8,0x80,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+ 	movq $0xFFFFFF80, %rax
+
+// CHECK: movq $4294967295, %rax
+// CHECK: encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+        movq $0xFFFFFFFF, %rax
+
+// CHECK: addq $2147483647, %rax
+// CHECK: encoding: [0x48,0x05,0xff,0xff,0xff,0x7f]
+ 	addq $0x000000007FFFFFFF, %rax
+
+// CHECK: addq $-2147483648, %rax
+// CHECK: encoding: [0x48,0x05,0x00,0x00,0x00,0x80]
+	addq $0xFFFFFFFF80000000, %rax
+
+// CHECK: addq $-256, %rax
+// CHECK: encoding: [0x48,0x05,0x00,0xff,0xff,0xff]
+ 	addq $0xFFFFFFFFFFFFFF00, %rax
diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s
index 4028bee..6988110 100644
--- a/test/MC/AsmParser/X86/x86_64-new-encoder.s
+++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s
@@ -52,3 +52,82 @@ testq %rax, %rbx
 // CHECK: cmpq	%rbx, %r14
 // CHECK:   encoding: [0x49,0x39,0xde]
         cmpq %rbx, %r14
+
+// rdar://7947167
+
+movsq
+// CHECK: movsq
+// CHECK:   encoding: [0x48,0xa5]
+
+movsl
+// CHECK: movsl
+// CHECK:   encoding: [0xa5]
+
+stosq
+// CHECK: stosq
+// CHECK:   encoding: [0x48,0xab]
+stosl
+// CHECK: stosl
+// CHECK:   encoding: [0xab]
+
+
+// Not moffset forms of moves, they are x86-32 only! rdar://7947184
+movb	0, %al    // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A]
+movw	0, %ax    // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A]
+movl	0, %eax   // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A]
+
+// CHECK: pushfq	# encoding: [0x9c]
+        pushf
+// CHECK: pushfq	# encoding: [0x9c]
+        pushfq
+// CHECK: popfq	        # encoding: [0x9d]
+        popf
+// CHECK: popfq	        # encoding: [0x9d]
+        popfq
+
+// CHECK: movabsq $-281474976710654, %rax
+// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
+        movabsq $0xFFFF000000000002, %rax
+
+// CHECK: movq $-281474976710654, %rax
+// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
+        movq $0xFFFF000000000002, %rax
+
+// CHECK: movq $-65536, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0x00,0xff,0xff]
+        movq $0xFFFFFFFFFFFF0000, %rax
+
+// CHECK: movq $-256, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0xff,0xff,0xff]
+        movq $0xFFFFFFFFFFFFFF00, %rax
+
+// CHECK: movq $10, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x0a,0x00,0x00,0x00]
+        movq $10, %rax
+
+// rdar://8014869
+//
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+        retq
+
+// CHECK: sete %al
+// CHECK: encoding: [0x0f,0x94,0xc0]
+        setz %al
+
+// CHECK: setne %al
+// CHECK: encoding: [0x0f,0x95,0xc0]
+        setnz %al
+
+// CHECK: je 0
+// CHECK: encoding: [0x74,A]
+        jz 0
+
+// CHECK: jne
+// CHECK: encoding: [0x75,A]
+        jnz 0
+
+// rdar://8017515
+btq $0x01,%rdx
+// CHECK: btq	$1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0xba,0xe2,0x01]
diff --git a/test/MC/AsmParser/X86/x86_64-suffix-matching.s b/test/MC/AsmParser/X86/x86_64-suffix-matching.s
index c4f0be2..0ae6fe5 100644
--- a/test/MC/AsmParser/X86/x86_64-suffix-matching.s
+++ b/test/MC/AsmParser/X86/x86_64-suffix-matching.s
@@ -4,3 +4,7 @@
         add $0, %eax
 // CHECK: addb $255, %al
         add $0xFF, %al
+// CHECK: orq %rax, %rdx
+        or %rax, %rdx
+// CHECK: shlq $3, %rax
+        shl $3, %rax
diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s
index b558c2e..4bc8a4b 100644
--- a/test/MC/AsmParser/X86/x86_instructions.s
+++ b/test/MC/AsmParser/X86/x86_instructions.s
@@ -143,3 +143,11 @@
 fadd %st(0)
 fadd %st(1)
 fadd %st(7)
+
+// CHECK: leal 0, %eax
+        leal 0, %eax
+
+// rdar://7986634 - Insensitivity on opcodes.
+// CHECK: int3
+INT3
+
diff --git a/test/MC/AsmParser/assignment.s b/test/MC/AsmParser/assignment.s
index 882fae8..73ce860 100644
--- a/test/MC/AsmParser/assignment.s
+++ b/test/MC/AsmParser/assignment.s
@@ -2,6 +2,10 @@
 
 # CHECK: TEST0:
 # CHECK: a = 0
-TEST0:  
+TEST0:
         a = 0
-        
+
+# CHECK: .globl	_f1
+# CHECK: _f1 = 0
+        .globl _f1
+        _f1 = 0
diff --git a/test/MC/AsmParser/directive_tbss.s b/test/MC/AsmParser/directive_tbss.s
new file mode 100644
index 0000000..62d7123
--- /dev/null
+++ b/test/MC/AsmParser/directive_tbss.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK: .tbss _a$tlv$init, 4
+# CHECK: .tbss _b$tlv$init, 4, 3
+
+.tbss _a$tlv$init, 4
+.tbss _b$tlv$init, 4, 3
diff --git a/test/MC/AsmParser/directive_tdata.s b/test/MC/AsmParser/directive_tdata.s
new file mode 100644
index 0000000..240bef0
--- /dev/null
+++ b/test/MC/AsmParser/directive_tdata.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK:	__DATA,__thread_data,thread_local_regular
+# CHECK:	_a$tlv$init:
+# CHECK:	.quad 4
+
+	.tdata
+_a$tlv$init:
+	.quad 4
diff --git a/test/MC/AsmParser/directive_thread_init_func.s b/test/MC/AsmParser/directive_thread_init_func.s
new file mode 100644
index 0000000..4abd5bf
--- /dev/null
+++ b/test/MC/AsmParser/directive_thread_init_func.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK: __DATA,__thread_init,thread_local_init_function_pointers
+# CHECK: .quad 0
+
+.thread_init_func
+	.quad 0
diff --git a/test/MC/AsmParser/directive_tlv.s b/test/MC/AsmParser/directive_tlv.s
new file mode 100644
index 0000000..c4b3e10
--- /dev/null
+++ b/test/MC/AsmParser/directive_tlv.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK:	__DATA,__thread_vars,thread_local_variables
+# CHECK:	.globl _a
+# CHECK:	_a:
+# CHECK:	.quad 0
+
+	.tlv
+.globl _a
+_a:
+	.quad 0
+	.quad 0
+	.quad 0
diff --git a/test/MC/AsmParser/directive_zerofill.s b/test/MC/AsmParser/directive_zerofill.s
index 4b26f9b..207b8a9 100644
--- a/test/MC/AsmParser/directive_zerofill.s
+++ b/test/MC/AsmParser/directive_zerofill.s
@@ -4,7 +4,11 @@
 # CHECK: .zerofill __FOO,__bar,x,1
 # CHECK: .zerofill __FOO,__bar,y,8,2
 # CHECK: .zerofill __EMPTY,__NoSymbol
+# CHECK: .zerofill __DATA,__bss,"what you say?",8,3
 TEST0:  
 	.zerofill __FOO, __bar, x, 2-1
 	.zerofill __FOO,   __bar, y ,  8 , 1+1
 	.zerofill __EMPTY,__NoSymbol
+        
+        # rdar://7965971
+        .zerofill __DATA, __bss, "what you say?", 8, 3
diff --git a/test/MC/Disassembler/simple-tests.txt b/test/MC/Disassembler/simple-tests.txt
index 4155261..369aa9a 100644
--- a/test/MC/Disassembler/simple-tests.txt
+++ b/test/MC/Disassembler/simple-tests.txt
@@ -54,3 +54,6 @@
 
 # CHECK: movl $0, -4(%rbp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
+
+# CHECK: movq	%cr0, %rcx
+0x0f 0x20 0xc1
diff --git a/test/MC/MachO/darwin-x86_64-reloc.s b/test/MC/MachO/darwin-x86_64-reloc.s
index d5e75d1..19b4345 100644
--- a/test/MC/MachO/darwin-x86_64-reloc.s
+++ b/test/MC/MachO/darwin-x86_64-reloc.s
@@ -1,7 +1,10 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
 
 // These examples are taken from <mach-o/x86_64/reloc.h>.
 
+        .data
+        .long 0
+
         .text
 _foo:
         ret
@@ -42,36 +45,110 @@ L1:
 .long	_foobar@GOTPCREL+4
 .long	_foo@GOTPCREL+4
 
+        .section	__DWARF,__debug_frame,regular,debug
+        .quad L1
+        .quad _ext_foo
+
+// Make sure local label which overlaps with non-local one is assigned to the
+// right atom.
+        .text
+_f2:
+L2_0:
+        addl $0, %eax
+L2_1:        
+_f3:
+        addl L2_1 - L2_0, %eax
+        
+        .data
+L4:     
+        .long 0
+        .text
+        movl L4(%rip), %eax
+
+        .section __TEXT,__literal8,8byte_literals
+	.quad 0
+L5:     
+	.quad 0
+f6:
+        .quad 0
+L6:
+        .quad 0
+        
+        .text
+	movl L5(%rip), %eax
+	movl f6(%rip), %eax
+	movl L6(%rip), %eax
+        
+        .data
+        .quad L5
+        .quad f6
+	.quad L6
+
+        .text
+        cmpq $0, _foo@GOTPCREL(%rip)
+        
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
 // CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 336)
+// CHECK: ('load_commands_size', 496)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
 // CHECK: ('load_commands', [
 // CHECK:   # Load Command 0
 // CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
+// CHECK:   ('size', 392)
 // CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 189)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 189)
+// CHECK:   ('vm_size', 311)
+// CHECK:   ('file_offset', 528)
+// CHECK:   ('file_size', 311)
 // CHECK:   ('maxprot', 7)
 // CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
+// CHECK:   ('num_sections', 4)
 // CHECK:   ('flags', 0)
 // CHECK:   ('sections', [
 // CHECK:     # Section 0
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 40)
+// CHECK:     ('offset', 528)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 840)
+// CHECK:     ('num_reloc', 5)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x6000004)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x18),
+// CHECK:      ('word-1', 0xe000006)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0x6000004)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0x4d000008)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x1f\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00/\x01\x00\x00\x00\x00\x00\x00')
+// CHECK:     # Section 1
 // CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 181)
-// CHECK:     ('offset', 368)
+// CHECK:     ('address', 40)
+// CHECK:     ('size', 223)
+// CHECK:     ('offset', 568)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 560)
-// CHECK:     ('num_reloc', 27)
+// CHECK:     ('reloc_offset', 880)
+// CHECK:     ('num_reloc', 32)
 // CHECK:     ('flags', 0x80000400)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
@@ -79,155 +156,219 @@ L1:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xda),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xd3),
+// CHECK:      ('word-1', 0x15000004)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xcd),
+// CHECK:      ('word-1', 0x1d000006)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0xc7),
+// CHECK:      ('word-1', 0x15000004)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0xc1),
+// CHECK:      ('word-1', 0x15000001)),
+// CHECK:     # Relocation 5
 // CHECK:     (('word-0', 0xa5),
 // CHECK:      ('word-1', 0x5e000003)),
-// CHECK:     # Relocation 1
+// CHECK:     # Relocation 6
 // CHECK:     (('word-0', 0xa5),
 // CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 2
+// CHECK:     # Relocation 7
 // CHECK:     (('word-0', 0x9d),
 // CHECK:      ('word-1', 0x5e000003)),
-// CHECK:     # Relocation 3
+// CHECK:     # Relocation 8
 // CHECK:     (('word-0', 0x9d),
 // CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 4
+// CHECK:     # Relocation 9
 // CHECK:     (('word-0', 0x95),
 // CHECK:      ('word-1', 0xe000003)),
-// CHECK:     # Relocation 5
+// CHECK:     # Relocation 10
 // CHECK:     (('word-0', 0x8d),
 // CHECK:      ('word-1', 0xe000003)),
-// CHECK:     # Relocation 6
+// CHECK:     # Relocation 11
 // CHECK:     (('word-0', 0x79),
 // CHECK:      ('word-1', 0x8d000003)),
-// CHECK:     # Relocation 7
+// CHECK:     # Relocation 12
 // CHECK:     (('word-0', 0x71),
 // CHECK:      ('word-1', 0x7d000003)),
-// CHECK:     # Relocation 8
+// CHECK:     # Relocation 13
 // CHECK:     (('word-0', 0x69),
 // CHECK:      ('word-1', 0x6d000003)),
-// CHECK:     # Relocation 9
+// CHECK:     # Relocation 14
 // CHECK:     (('word-0', 0x63),
 // CHECK:      ('word-1', 0x1d000003)),
-// CHECK:     # Relocation 10
+// CHECK:     # Relocation 15
 // CHECK:     (('word-0', 0x5c),
 // CHECK:      ('word-1', 0x1d000003)),
-// CHECK:     # Relocation 11
+// CHECK:     # Relocation 16
 // CHECK:     (('word-0', 0x55),
 // CHECK:      ('word-1', 0x5c000002)),
-// CHECK:     # Relocation 12
+// CHECK:     # Relocation 17
 // CHECK:     (('word-0', 0x55),
 // CHECK:      ('word-1', 0xc000000)),
-// CHECK:     # Relocation 13
+// CHECK:     # Relocation 18
 // CHECK:     (('word-0', 0x4d),
 // CHECK:      ('word-1', 0x5e000002)),
-// CHECK:     # Relocation 14
+// CHECK:     # Relocation 19
 // CHECK:     (('word-0', 0x4d),
 // CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 15
+// CHECK:     # Relocation 20
 // CHECK:     (('word-0', 0x45),
 // CHECK:      ('word-1', 0x5e000002)),
-// CHECK:     # Relocation 16
+// CHECK:     # Relocation 21
 // CHECK:     (('word-0', 0x45),
 // CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 17
+// CHECK:     # Relocation 22
 // CHECK:     (('word-0', 0x3d),
 // CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 18
+// CHECK:     # Relocation 23
 // CHECK:     (('word-0', 0x35),
 // CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 19
+// CHECK:     # Relocation 24
 // CHECK:     (('word-0', 0x2d),
 // CHECK:      ('word-1', 0x8d000000)),
-// CHECK:     # Relocation 20
+// CHECK:     # Relocation 25
 // CHECK:     (('word-0', 0x26),
 // CHECK:      ('word-1', 0x6d000000)),
-// CHECK:     # Relocation 21
+// CHECK:     # Relocation 26
 // CHECK:     (('word-0', 0x20),
 // CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 22
+// CHECK:     # Relocation 27
 // CHECK:     (('word-0', 0x1a),
 // CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 23
+// CHECK:     # Relocation 28
 // CHECK:     (('word-0', 0x14),
 // CHECK:      ('word-1', 0x4d000000)),
-// CHECK:     # Relocation 24
+// CHECK:     # Relocation 29
 // CHECK:     (('word-0', 0xe),
 // CHECK:      ('word-1', 0x3d000000)),
-// CHECK:     # Relocation 25
+// CHECK:     # Relocation 30
 // CHECK:     (('word-0', 0x7),
 // CHECK:      ('word-1', 0x2d000000)),
-// CHECK:     # Relocation 26
+// CHECK:     # Relocation 31
 // CHECK:     (('word-0', 0x2),
 // CHECK:      ('word-1', 0x2d000000)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 181)
-// CHECK:     ('size', 8)
-// CHECK:     ('offset', 549)
+// CHECK:   ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x05\x1f\xff\xff\xff\x8b\x05,\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x050\x00\x00\x00H\x83=\xff\xff\xff\xff\x00')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__debug_frame\x00\x00\x00')
+// CHECK:     ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 263)
+// CHECK:     ('size', 16)
+// CHECK:     ('offset', 791)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 776)
+// CHECK:     ('reloc_offset', 1136)
 // CHECK:     ('num_reloc', 2)
-// CHECK:     ('flags', 0x0)
+// CHECK:     ('flags', 0x2000000)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:     ('reserved3', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0xe000007)),
 // CHECK:     # Relocation 1
 // CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0x4d000004)),
+// CHECK:      ('word-1', 0x6000002)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x04\x00\x00\x00\x04\x00\x00\x00')
+// CHECK:   ('_section_data', '\xd5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 279)
+// CHECK:     ('size', 32)
+// CHECK:     ('offset', 807)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
-// CHECK:   ('symoff', 792)
-// CHECK:   ('nsyms', 5)
-// CHECK:   ('stroff', 872)
-// CHECK:   ('strsize', 32)
-// CHECK:   ('_string_data', '\x00_foobar\x00_foo\x00_baz\x00_bar\x00_prev\x00\x00\x00')
+// CHECK:   ('symoff', 1152)
+// CHECK:   ('nsyms', 9)
+// CHECK:   ('stroff', 1296)
+// CHECK:   ('strsize', 52)
+// CHECK:   ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00f6\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 9)
+// CHECK:    (('n_strx', 18)
 // CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
+// CHECK:     ('n_value', 40)
 // CHECK:     ('_string', '_foo')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 14)
+// CHECK:    (('n_strx', 23)
 // CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 1)
+// CHECK:     ('n_value', 41)
 // CHECK:     ('_string', '_baz')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 19)
+// CHECK:    (('n_strx', 28)
 // CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 61)
+// CHECK:     ('n_value', 101)
 // CHECK:     ('_string', '_bar')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 24)
+// CHECK:    (('n_strx', 33)
 // CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 129)
+// CHECK:     ('n_value', 169)
 // CHECK:     ('_string', '_prev')
 // CHECK:    ),
 // CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 39)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 221)
+// CHECK:     ('_string', '_f2')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 43)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 224)
+// CHECK:     ('_string', '_f3')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 47)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 295)
+// CHECK:     ('_string', 'f6')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 9)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_ext_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 8
 // CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -241,11 +382,11 @@ L1:
 // CHECK:  (('command', 11)
 // CHECK:   ('size', 80)
 // CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 4)
-// CHECK:   ('iextdefsym', 4)
+// CHECK:   ('nlocalsym', 7)
+// CHECK:   ('iextdefsym', 7)
 // CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 1)
+// CHECK:   ('iundefsym', 7)
+// CHECK:   ('nundefsym', 2)
 // CHECK:   ('tocoff', 0)
 // CHECK:   ('ntoc', 0)
 // CHECK:   ('modtaboff', 0)
diff --git a/test/MC/MachO/direction_labels.s b/test/MC/MachO/direction_labels.s
new file mode 100644
index 0000000..5a68f7f
--- /dev/null
+++ b/test/MC/MachO/direction_labels.s
@@ -0,0 +1,95 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+direction_labels:
+10:     nop
+        jmp 10b
+        nop
+	jne 0f
+0:	nop
+	jne 0b
+        jmp 11f
+11:     nop
+        ret
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 13)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 13)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 13)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x90\xeb\xfd\x90u\x00\x90u\xfd\xeb\x00\x90\xc3')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 272)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 284)
+// CHECK:   ('strsize', 20)
+// CHECK:   ('_string_data', '\x00direction_labels\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'direction_labels')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/indirect-symbols.s b/test/MC/MachO/indirect-symbols.s
new file mode 100644
index 0000000..45a62f6
--- /dev/null
+++ b/test/MC/MachO/indirect-symbols.s
@@ -0,0 +1,188 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+_b:
+        _c = 0
+_e:
+        _f = 0
+        
+	.section	__IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5
+.indirect_symbol _a
+	.ascii	 "\364\364\364\364\364"        
+.indirect_symbol _b
+	.ascii	 "\364\364\364\364\364"        
+.indirect_symbol _c
+	.ascii	 "\364\364\364\364\364"        
+	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
+.indirect_symbol _d
+	.long	0
+.indirect_symbol _e
+	.long	0
+.indirect_symbol _f
+	.long	0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 27)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 27)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__jump_table\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 15)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x84000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 5)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__pointers\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 15)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 407)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+// CHECK:     ('reserved1', 3)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 444)
+// CHECK:   ('nsyms', 6)
+// CHECK:   ('stroff', 516)
+// CHECK:   ('strsize', 20)
+// CHECK:   ('_string_data', '\x00_d\x00_a\x00_b\x00_c\x00_e\x00_f\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_b')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0x2)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_c')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_e')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0x2)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_f')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 1)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 4)
+// CHECK:   ('iextdefsym', 4)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 2)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 420)
+// CHECK:   ('nindirectsyms', 6)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:     # Indirect Symbol 0
+// CHECK:     (('symbol_index', 0x4),),
+// CHECK:     # Indirect Symbol 1
+// CHECK:     (('symbol_index', 0x0),),
+// CHECK:     # Indirect Symbol 2
+// CHECK:     (('symbol_index', 0x1),),
+// CHECK:     # Indirect Symbol 3
+// CHECK:     (('symbol_index', 0x5),),
+// CHECK:     # Indirect Symbol 4
+// CHECK:     (('symbol_index', 0x80000000),),
+// CHECK:     # Indirect Symbol 5
+// CHECK:     (('symbol_index', 0xc0000000),),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/reloc.s b/test/MC/MachO/reloc.s
index c305eeb..f91f425 100644
--- a/test/MC/MachO/reloc.s
+++ b/test/MC/MachO/reloc.s
@@ -41,6 +41,17 @@ L1:
         jmp L1
         ret
 
+        .objc_class_name_A=0
+	.globl .objc_class_name_A
+
+        .text
+        .globl _f1
+        .weak_definition _f1
+_f1:
+        .data
+        .long _f1
+        .long _f1 + 4
+
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
@@ -53,9 +64,9 @@ L1:
 // CHECK:   ('size', 260)
 // CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 63)
+// CHECK:   ('vm_size', 71)
 // CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 63)
+// CHECK:   ('file_size', 71)
 // CHECK:   ('maxprot', 7)
 // CHECK:   ('initprot', 7)
 // CHECK:   ('num_sections', 3)
@@ -68,7 +79,7 @@ L1:
 // CHECK:     ('size', 8)
 // CHECK:     ('offset', 392)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 456)
+// CHECK:     ('reloc_offset', 464)
 // CHECK:     ('num_reloc', 1)
 // CHECK:     ('flags', 0x80000400)
 // CHECK:     ('reserved1', 0)
@@ -79,58 +90,64 @@ L1:
 // CHECK:     (('word-0', 0x1),
 // CHECK:      ('word-1', 0x5000003)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xe92\x00\x00\x00\xeb\xf9\xc3')
+// CHECK:   ('_section_data', '\xe9:\x00\x00\x00\xeb\xf9\xc3')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('address', 8)
-// CHECK:     ('size', 43)
+// CHECK:     ('size', 51)
 // CHECK:     ('offset', 400)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 464)
-// CHECK:     ('num_reloc', 9)
+// CHECK:     ('reloc_offset', 472)
+// CHECK:     ('num_reloc', 11)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x2f),
+// CHECK:      ('word-1', 0xc000007)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x2b),
+// CHECK:      ('word-1', 0xc000007)),
+// CHECK:     # Relocation 2
 // CHECK:     (('word-0', 0x8000002a),
 // CHECK:      ('word-1', 0x18)),
-// CHECK:     # Relocation 1
+// CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0x90000028),
 // CHECK:      ('word-1', 0x18)),
-// CHECK:     # Relocation 2
+// CHECK:     # Relocation 4
 // CHECK:     (('word-0', 0xa0000024),
 // CHECK:      ('word-1', 0x18)),
-// CHECK:     # Relocation 3
+// CHECK:     # Relocation 5
 // CHECK:     (('word-0', 0xa0000020),
 // CHECK:      ('word-1', 0x18)),
-// CHECK:     # Relocation 4
+// CHECK:     # Relocation 6
 // CHECK:     (('word-0', 0xa4000014),
 // CHECK:      ('word-1', 0x1c)),
-// CHECK:     # Relocation 5
+// CHECK:     # Relocation 7
 // CHECK:     (('word-0', 0xa1000000),
 // CHECK:      ('word-1', 0x24)),
-// CHECK:     # Relocation 6
+// CHECK:     # Relocation 8
 // CHECK:     (('word-0', 0x8),
 // CHECK:      ('word-1', 0x4000002)),
-// CHECK:     # Relocation 7
+// CHECK:     # Relocation 9
 // CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0xc000007)),
-// CHECK:     # Relocation 8
+// CHECK:      ('word-1', 0xc000009)),
+// CHECK:     # Relocation 10
 // CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xc000007)),
+// CHECK:      ('word-1', 0xc000009)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q')
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q\x00\x00\x00\x00\x04\x00\x00\x00')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 51)
+// CHECK:     ('address', 59)
 // CHECK:     ('size', 12)
-// CHECK:     ('offset', 443)
+// CHECK:     ('offset', 451)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 536)
+// CHECK:     ('reloc_offset', 560)
 // CHECK:     ('num_reloc', 4)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
@@ -148,22 +165,22 @@ L1:
 // CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x33)),
+// CHECK:      ('word-1', 0x3b)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x06\x00\x00\x007\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '\xfe\xff\xff\xff?\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
-// CHECK:   ('symoff', 568)
-// CHECK:   ('nsyms', 8)
-// CHECK:   ('stroff', 664)
-// CHECK:   ('strsize', 64)
-// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00')
+// CHECK:   ('symoff', 592)
+// CHECK:   ('nsyms', 10)
+// CHECK:   ('stroff', 712)
+// CHECK:   ('strsize', 88)
+// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00.objc_class_name_A\x00_f1\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 19)
+// CHECK:    (('n_strx', 42)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -171,7 +188,7 @@ L1:
 // CHECK:     ('_string', 'local_a')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 27)
+// CHECK:    (('n_strx', 50)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -179,7 +196,7 @@ L1:
 // CHECK:     ('_string', 'local_a_elt')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 39)
+// CHECK:    (('n_strx', 62)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -187,7 +204,7 @@ L1:
 // CHECK:     ('_string', 'local_b')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 47)
+// CHECK:    (('n_strx', 70)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -195,15 +212,15 @@ L1:
 // CHECK:     ('_string', 'local_c')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 55)
+// CHECK:    (('n_strx', 78)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 51)
+// CHECK:     ('n_value', 59)
 // CHECK:     ('_string', 'bar')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 59)
+// CHECK:    (('n_strx', 82)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -211,6 +228,22 @@ L1:
 // CHECK:     ('_string', '_f0')
 // CHECK:    ),
 // CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 19)
+// CHECK:     ('n_type', 0x3)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '.objc_class_name_A')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 38)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_f1')
+// CHECK:    ),
+// CHECK:     # Symbol 8
 // CHECK:    (('n_strx', 7)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
@@ -218,7 +251,7 @@ L1:
 // CHECK:     ('n_value', 16)
 // CHECK:     ('_string', 'local_a_ext')
 // CHECK:    ),
-// CHECK:     # Symbol 7
+// CHECK:     # Symbol 9
 // CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -234,8 +267,8 @@ L1:
 // CHECK:   ('ilocalsym', 0)
 // CHECK:   ('nlocalsym', 6)
 // CHECK:   ('iextdefsym', 6)
-// CHECK:   ('nextdefsym', 1)
-// CHECK:   ('iundefsym', 7)
+// CHECK:   ('nextdefsym', 3)
+// CHECK:   ('iundefsym', 9)
 // CHECK:   ('nundefsym', 1)
 // CHECK:   ('tocoff', 0)
 // CHECK:   ('ntoc', 0)
diff --git a/test/MC/MachO/string-table.s b/test/MC/MachO/string-table.s
new file mode 100644
index 0000000..b811a0b
--- /dev/null
+++ b/test/MC/MachO/string-table.s
@@ -0,0 +1,100 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+        
+	movl	$a, b
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 10)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 10)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 10)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 268)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x6),
+// CHECK:      ('word-1', 0xc000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc7\x05\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 284)
+// CHECK:   ('nsyms', 2)
+// CHECK:   ('stroff', 308)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00a\x00b\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 3)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'b')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 0)
+// CHECK:   ('nundefsym', 2)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/symbol-flags.s b/test/MC/MachO/symbol-flags.s
index e82b0a0..705fa39 100644
--- a/test/MC/MachO/symbol-flags.s
+++ b/test/MC/MachO/symbol-flags.s
@@ -5,6 +5,10 @@
 sym_ref_def_A:
 sym_ref_def_C:  
         .reference sym_ref_def_C
+        .reference sym_ref_def_D
+        .globl sym_ref_def_D
+        .globl sym_ref_def_E
+        .reference sym_ref_def_E
         
         .weak_reference sym_weak_ref_A
         .weak_reference sym_weak_ref_def_A
@@ -16,12 +20,22 @@ sym_weak_ref_def_B:
         .globl sym_weak_def_A
         .weak_definition sym_weak_def_A        
 sym_weak_def_A:
+sym_weak_def_B:
+        .weak_definition sym_weak_def_B
+        .globl sym_weak_def_B
+        .weak_definition sym_weak_def_C
+sym_weak_def_C:
+        .globl sym_weak_def_C
 
         .lazy_reference sym_lazy_ref_A
         .lazy_reference sym_lazy_ref_B
 sym_lazy_ref_B:
 sym_lazy_ref_C:
         .lazy_reference sym_lazy_ref_C
+        .lazy_reference sym_lazy_ref_D
+        .globl sym_lazy_ref_D
+        .globl sym_lazy_ref_E
+        .lazy_reference sym_lazy_ref_E
 
         .private_extern sym_private_ext_A
         .private_extern sym_private_ext_B
@@ -30,6 +44,8 @@ sym_private_ext_C:
         .private_extern sym_private_ext_C
         .private_extern sym_private_ext_D
         .globl sym_private_ext_D
+        .globl sym_private_ext_E
+        .private_extern sym_private_ext_E
 
         .no_dead_strip sym_no_dead_strip_A
 
@@ -39,7 +55,7 @@ sym_private_ext_C:
 
         .desc sym_desc_flags,0x47
 sym_desc_flags:
-
+        
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
@@ -73,6 +89,8 @@ sym_desc_flags:
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -86,19 +104,21 @@ sym_desc_flags:
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
 // CHECK:   ('symoff', 324)
-// CHECK:   ('nsyms', 16)
-// CHECK:   ('stroff', 516)
-// CHECK:   ('strsize', 260)
-// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_lazy_ref_A\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_desc_flags\x00\x00')
+// CHECK:   ('nsyms', 23)
+// CHECK:   ('stroff', 600)
+// CHECK:   ('strsize', 368)
+// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_ref_def_D\x00sym_ref_def_E\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_weak_def_B\x00sym_weak_def_C\x00sym_lazy_ref_A\x00sym_lazy_ref_D\x00sym_lazy_ref_E\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_private_ext_E\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_desc_flags\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 148)
+// CHECK:    (('n_strx', 254)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 32)
@@ -106,7 +126,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_def_A')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 162)
+// CHECK:    (('n_strx', 268)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 32)
@@ -114,15 +134,15 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_def_C')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 176)
+// CHECK:    (('n_strx', 282)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_desc', 64)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_weak_ref_def_A')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 195)
+// CHECK:    (('n_strx', 301)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -130,7 +150,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_ref_def_B')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 214)
+// CHECK:    (('n_strx', 320)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 32)
@@ -138,7 +158,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_B')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 229)
+// CHECK:    (('n_strx', 335)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 32)
@@ -146,15 +166,15 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_C')
 // CHECK:    ),
 // CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 244)
+// CHECK:    (('n_strx', 350)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_desc', 64)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_desc_flags')
 // CHECK:    ),
 // CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 74)
+// CHECK:    (('n_strx', 162)
 // CHECK:     ('n_type', 0x1f)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -162,7 +182,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_B')
 // CHECK:    ),
 // CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 92)
+// CHECK:    (('n_strx', 180)
 // CHECK:     ('n_type', 0x1f)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -170,7 +190,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_C')
 // CHECK:    ),
 // CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 26)
+// CHECK:    (('n_strx', 54)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 128)
@@ -178,38 +198,78 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_def_A')
 // CHECK:    ),
 // CHECK:     # Symbol 10
-// CHECK:    (('n_strx', 41)
+// CHECK:    (('n_strx', 69)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_def_B')
+// CHECK:    ),
+// CHECK:     # Symbol 11
+// CHECK:    (('n_strx', 84)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_def_C')
+// CHECK:    ),
+// CHECK:     # Symbol 12
+// CHECK:    (('n_strx', 99)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 33)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_lazy_ref_A')
 // CHECK:    ),
-// CHECK:     # Symbol 11
-// CHECK:    (('n_strx', 128)
+// CHECK:     # Symbol 13
+// CHECK:    (('n_strx', 114)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_D')
+// CHECK:    ),
+// CHECK:     # Symbol 14
+// CHECK:    (('n_strx', 129)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 33)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_E')
+// CHECK:    ),
+// CHECK:     # Symbol 15
+// CHECK:    (('n_strx', 234)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_no_dead_strip_A')
 // CHECK:    ),
-// CHECK:     # Symbol 12
-// CHECK:    (('n_strx', 56)
+// CHECK:     # Symbol 16
+// CHECK:    (('n_strx', 144)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_A')
 // CHECK:    ),
-// CHECK:     # Symbol 13
-// CHECK:    (('n_strx', 110)
+// CHECK:     # Symbol 17
+// CHECK:    (('n_strx', 198)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_D')
 // CHECK:    ),
-// CHECK:     # Symbol 14
+// CHECK:     # Symbol 18
+// CHECK:    (('n_strx', 216)
+// CHECK:     ('n_type', 0x11)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_E')
+// CHECK:    ),
+// CHECK:     # Symbol 19
 // CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -217,10 +277,26 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_ref_A')
 // CHECK:    ),
-// CHECK:     # Symbol 15
+// CHECK:     # Symbol 20
 // CHECK:    (('n_strx', 11)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_D')
+// CHECK:    ),
+// CHECK:     # Symbol 21
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_E')
+// CHECK:    ),
+// CHECK:     # Symbol 22
+// CHECK:    (('n_strx', 39)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 64)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_weak_ref_A')
@@ -233,9 +309,9 @@ sym_desc_flags:
 // CHECK:   ('ilocalsym', 0)
 // CHECK:   ('nlocalsym', 7)
 // CHECK:   ('iextdefsym', 7)
-// CHECK:   ('nextdefsym', 3)
-// CHECK:   ('iundefsym', 10)
-// CHECK:   ('nundefsym', 6)
+// CHECK:   ('nextdefsym', 5)
+// CHECK:   ('iundefsym', 12)
+// CHECK:   ('nundefsym', 11)
 // CHECK:   ('tocoff', 0)
 // CHECK:   ('ntoc', 0)
 // CHECK:   ('modtaboff', 0)
diff --git a/test/MC/MachO/tbss.s b/test/MC/MachO/tbss.s
new file mode 100644
index 0000000..2131ea4
--- /dev/null
+++ b/test/MC/MachO/tbss.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+.tbss _a$tlv$init, 4
+.tbss _b$tlv$init, 4, 3
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 12)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x12)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01\x03\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 368)
+// CHECK:   ('nsyms', 2)
+// CHECK:   ('stroff', 400)
+// CHECK:   ('strsize', 28)
+// CHECK:   ('_string_data', '\x00_a$tlv$init\x00_b$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_b$tlv$init')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 2)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/tdata.s b/test/MC/MachO/tdata.s
new file mode 100644
index 0000000..64f88b5
--- /dev/null
+++ b/test/MC/MachO/tdata.s
@@ -0,0 +1,106 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+.tdata
+_a$tlv$init:
+	.long 4
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 4)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 4)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x11)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x04\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 372)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 388)
+// CHECK:   ('strsize', 16)
+// CHECK:   ('_string_data', '\x00_a$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/thread_init_func.s b/test/MC/MachO/thread_init_func.s
new file mode 100644
index 0000000..eeab6e1
--- /dev/null
+++ b/test/MC/MachO/thread_init_func.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump	--dump-section-data | FileCheck %s
+
+	.thread_init_func
+	.quad 0
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 232)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 8)
+// CHECK:   ('file_offset', 264)
+// CHECK:   ('file_size', 8)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 264)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_init\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 264)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x15)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/tls.s b/test/MC/MachO/tls.s
new file mode 100644
index 0000000..07ea0aa
--- /dev/null
+++ b/test/MC/MachO/tls.s
@@ -0,0 +1,270 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .section        __TEXT,__text,regular,pure_instructions
+        .section        __DATA,__thread_data,thread_local_regular
+        .globl  _c$tlv$init
+        .align  2
+_c$tlv$init:
+        .long   4
+
+        .section        __DATA,__thread_vars,thread_local_variables
+        .globl  _c
+_c:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _c$tlv$init
+
+        .section        __DATA,__thread_data,thread_local_regular
+        .globl  _d$tlv$init
+        .align  2
+_d$tlv$init:
+        .long   5
+
+        .section        __DATA,__thread_vars,thread_local_variables
+        .globl  _d
+_d:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _d$tlv$init
+
+.tbss _a$tlv$init, 4, 2
+
+        .globl  _a
+_a:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _a$tlv$init
+
+.tbss _b$tlv$init, 4, 2
+
+        .globl  _b
+_b:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _b$tlv$init
+
+.subsections_via_symbols
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 496)
+// CHECK: ('flag', 8192)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 392)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 112)
+// CHECK:   ('file_offset', 528)
+// CHECK:   ('file_size', 104)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 4)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 528)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 528)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x11)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x04\x00\x00\x00\x05\x00\x00\x00')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 96)
+// CHECK:     ('offset', 536)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 632)
+// CHECK:     ('num_reloc', 8)
+// CHECK:     ('flags', 0x13)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x58),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x48),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x40),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x30),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x28),
+// CHECK:      ('word-1', 0xe000007)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x18),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0xe000005)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 104)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x12)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 696)
+// CHECK:   ('nsyms', 9)
+// CHECK:   ('stroff', 840)
+// CHECK:   ('strsize', 80)
+// CHECK:   ('_string_data', '\x00_c$tlv$init\x00_c\x00___tlv_bootstrap\x00_d$tlv$init\x00_d\x00_a\x00_b\x00_a$tlv$init\x00_b$tlv$init\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 54)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 104)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 66)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 108)
+// CHECK:     ('_string', '_b$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 48)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 56)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 80)
+// CHECK:     ('_string', '_b')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_c')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_c$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 45)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 32)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 33)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', '_d$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '___tlv_bootstrap')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 6)
+// CHECK:   ('iundefsym', 8)
+// CHECK:   ('nundefsym', 1)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/tlv-reloc.s b/test/MC/MachO/tlv-reloc.s
new file mode 100644
index 0000000..04fc7ae
--- /dev/null
+++ b/test/MC/MachO/tlv-reloc.s
@@ -0,0 +1,174 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+.tdata
+_a$tlv$init:
+	.long 4
+
+
+.tlv
+	.globl _a
+_a:
+	.quad __tlv_bootstrap
+	.quad 0
+	.quad _a$tlv$init
+
+.text
+	.globl _foo
+	.align 4, 0x90
+
+_foo:
+	 movq   _a@TLVP(%rip), %rdi
+	 call	*(%rdi) # returns &a in %rax
+	 ret
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 416)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 312)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 38)
+// CHECK:   ('file_offset', 448)
+// CHECK:   ('file_size', 38)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 10)
+// CHECK:     ('offset', 448)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 488)
+// CHECK:     ('num_reloc', 1)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x3),
+// CHECK:      ('word-1', 0x9d000001)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'H\x8b=\x00\x00\x00\x00\xff\x17\xc3')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 10)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 458)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x11)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x04\x00\x00\x00')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 14)
+// CHECK:     ('size', 24)
+// CHECK:     ('offset', 462)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 496)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x13)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 512)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 576)
+// CHECK:   ('strsize', 40)
+// CHECK:   ('_string_data', '\x00_a\x00__tlv_bootstrap\x00_foo\x00_a$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 10)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 14)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 20)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '__tlv_bootstrap')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 3)
+// CHECK:   ('nundefsym', 1)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/tlv.s b/test/MC/MachO/tlv.s
new file mode 100644
index 0000000..7dd7390
--- /dev/null
+++ b/test/MC/MachO/tlv.s
@@ -0,0 +1,110 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.tlv
+	.globl _a
+_a:
+	.quad 0
+	.quad 0
+	.quad 0
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 24)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 24)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 24)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x13)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 392)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 408)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00_a\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 1)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/x86_32-optimal_nop.s b/test/MC/MachO/x86_32-optimal_nop.s
index d21d143..e6d41d7 100644
--- a/test/MC/MachO/x86_32-optimal_nop.s
+++ b/test/MC/MachO/x86_32-optimal_nop.s
@@ -150,24 +150,31 @@
         .align 4, 0x90
         ret
 
+        # Only the .text sections gets optimal nops.
+	.section	__TEXT,__const
+f0:
+        .byte 0
+	.align	4, 0x90
+        .long 0
+
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
 // CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 124)
+// CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
 // CHECK:   # Load Command 0
 // CHECK:  (('command', 1)
-// CHECK:   ('size', 124)
+// CHECK:   ('size', 192)
 // CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 337)
-// CHECK:   ('file_offset', 152)
-// CHECK:   ('file_size', 337)
+// CHECK:   ('vm_size', 372)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 372)
 // CHECK:   ('maxprot', 7)
 // CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
+// CHECK:   ('num_sections', 2)
 // CHECK:   ('flags', 0)
 // CHECK:   ('sections', [
 // CHECK:     # Section 0
@@ -175,7 +182,7 @@
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('address', 0)
 // CHECK:     ('size', 337)
-// CHECK:     ('offset', 152)
+// CHECK:     ('offset', 324)
 // CHECK:     ('alignment', 4)
 // CHECK:     ('reloc_offset', 0)
 // CHECK:     ('num_reloc', 0)
@@ -186,6 +193,65 @@
 // CHECK:   ('_relocations', [
 // CHECK:   ])
 // CHECK:   ('_section_data', '\xc3\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\x0f\x1f@\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3f\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3f\x0f\x1fD\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 352)
+// CHECK:     ('size', 20)
+// CHECK:     ('offset', 676)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 696)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 708)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00f0\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 352)
+// CHECK:     ('_string', 'f0')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
 // CHECK:   ])
 // CHECK:  ),
 // CHECK: ])
diff --git a/test/MC/MachO/zerofill-5.s b/test/MC/MachO/zerofill-5.s
new file mode 100644
index 0000000..3074f60
--- /dev/null
+++ b/test/MC/MachO/zerofill-5.s
@@ -0,0 +1,109 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.text
+	.align	3
+	.long	2
+
+        .zerofill __DATA,__bss,_g0,8,3
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 16)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 4)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x02\x00\x00\x00')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 372)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 388)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00_g0\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_g0')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/Other/2010-05-06-Printer.ll b/test/Other/2010-05-06-Printer.ll
new file mode 100644
index 0000000..17abafa
--- /dev/null
+++ b/test/Other/2010-05-06-Printer.ll
@@ -0,0 +1,6 @@
+; RUN: llc -O2 -print-after-all < %s 2>&1
+
+define void @tester(){
+  ret void
+}
+
diff --git a/test/Other/inline-asm-newline-terminator.ll b/test/Other/inline-asm-newline-terminator.ll
new file mode 100644
index 0000000..f6cc5c1
--- /dev/null
+++ b/test/Other/inline-asm-newline-terminator.ll
@@ -0,0 +1,7 @@
+; RUN: llc -filetype=obj -o - < %s
+; XFAIL: vg_leak
+
+; ModuleID = 't.c'
+target triple = "x86_64-apple-darwin10.0.0"
+
+module asm ".desc _f0, 0x10"
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index d0db5e4..1f9efe3 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -2,6 +2,7 @@
 target datalayout = "e-p:64:64:64"
 
 declare fastcc void @bar()
+declare void @llvm.stackrestore(i8*)
 
 @CG = constant i32 7
 
@@ -50,6 +51,8 @@ define i32 @foo() noreturn {
   %lb = load i32* bitcast (i8* blockaddress(@foo, %next) to i32*)
 ; CHECK: Call to block address
   call void()* bitcast (i8* blockaddress(@foo, %next) to void()*)()
+; CHECK: Undefined behavior: Null pointer dereference
+  call void @llvm.stackrestore(i8* null)
 
   br label %next
 
@@ -77,8 +80,20 @@ define void @not_vararg(i8* %p) nounwind {
   ret void
 }
 
+; CHECK: Undefined behavior: Branch to non-blockaddress
 define void @use_indbr() {
   indirectbr i8* bitcast (i32()* @foo to i8*), [label %block]
 block:
   unreachable
 }
+
+; CHECK: Undefined behavior: Call with "tail" keyword references alloca or va_arg
+; CHECK: Undefined behavior: Call with "tail" keyword references alloca or va_arg
+declare void @tailcallee(i8*)
+define void @use_tail(i8* %valist) {
+  %t = alloca i8
+  tail call void @tailcallee(i8* %t)
+  %s = va_arg i8* %valist, i8*
+  tail call void @tailcallee(i8* %s)
+  ret void
+}
diff --git a/test/Transforms/GVN/2010-05-08-OneBit.ll b/test/Transforms/GVN/2010-05-08-OneBit.ll
new file mode 100644
index 0000000..1809cf0
--- /dev/null
+++ b/test/Transforms/GVN/2010-05-08-OneBit.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -gvn
+; PR7052
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) {
+entry:
+  %0 = getelementptr inbounds i8* undef, i64 5    ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  store i32 undef, i32* %1, align 1
+  br i1 undef, label %k121.i.i, label %l117.i.i
+
+l117.i.i:                                         ; preds = %entry
+  invoke fastcc void @foo()
+          to label %.noexc5 unwind label %landing_pad
+
+.noexc5:                                          ; preds = %l117.i.i
+  unreachable
+
+k121.i.i:                                         ; preds = %entry
+  br i1 undef, label %l129.i.i, label %k133.i.i
+
+l129.i.i:                                         ; preds = %k121.i.i
+  invoke fastcc void @foo()
+          to label %.noexc7 unwind label %landing_pad
+
+.noexc7:                                          ; preds = %l129.i.i
+  unreachable
+
+k133.i.i:                                         ; preds = %k121.i.i
+  %2 = getelementptr i8* undef, i64 5             ; <i8*> [#uses=1]
+  %3 = bitcast i8* %2 to i1*                      ; <i1*> [#uses=1]
+  %4 = load i1* %3                                ; <i1> [#uses=1]
+  br i1 %4, label %k151.i.i, label %l147.i.i
+
+l147.i.i:                                         ; preds = %k133.i.i
+  invoke fastcc void @foo()
+          to label %.noexc10 unwind label %landing_pad
+
+.noexc10:                                         ; preds = %l147.i.i
+  unreachable
+
+k151.i.i:                                         ; preds = %k133.i.i
+  ret i32 0
+
+landing_pad:                                      ; preds = %l147.i.i, %l129.i.i, %l117.i.i
+  switch i32 undef, label %fin [
+    i32 1, label %catch1
+    i32 2, label %catch
+  ]
+
+fin:                                              ; preds = %landing_pad
+  unreachable
+
+catch:                                            ; preds = %landing_pad
+  ret i32 1
+
+catch1:                                           ; preds = %landing_pad
+  ret i32 2
+}
+
+declare fastcc void @foo()
diff --git a/test/Transforms/Inline/2010-05-12-ValueMap.ll b/test/Transforms/Inline/2010-05-12-ValueMap.ll
new file mode 100644
index 0000000..f9cc13f
--- /dev/null
+++ b/test/Transforms/Inline/2010-05-12-ValueMap.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -inline -mergefunc -disable-output
+
+; This tests for a bug where the inliner kept the functions in a ValueMap after
+; it had completed and a ModulePass started to run. LLVM would crash deleting
+; a function that was still a key in the ValueMap.
+
+define internal fastcc void @list_Cdr1918() nounwind inlinehint {
+  unreachable
+}
+
+define internal fastcc void @list_PairSecond1927() nounwind inlinehint {
+  call fastcc void @list_Cdr1918() nounwind inlinehint
+  unreachable
+}
+
+define internal fastcc void @list_Cdr3164() nounwind inlinehint {
+  unreachable
+}
+
+define internal fastcc void @list_Nconc3167() nounwind inlinehint {
+  call fastcc void @list_Cdr3164() nounwind inlinehint
+  unreachable
+}
+
+define void @term_Equal() nounwind {
+  call fastcc void @list_Cdr3164() nounwind inlinehint
+  unreachable
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 878da68..77fccdf 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -605,3 +605,36 @@ define i64 @test59(i8 %A, i8 %B) nounwind {
 ; CHECK-NOT: i32
 ; CHECK:   ret i64 %H
 }
+
+define <3 x i32> @test60(<4 x i32> %call4) nounwind {
+  %tmp11 = bitcast <4 x i32> %call4 to i128
+  %tmp9 = trunc i128 %tmp11 to i96
+  %tmp10 = bitcast i96 %tmp9 to <3 x i32>
+  ret <3 x i32> %tmp10
+  
+; CHECK: @test60
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test61(<3 x i32> %call4) nounwind {
+  %tmp11 = bitcast <3 x i32> %call4 to i96
+  %tmp9 = zext i96 %tmp11 to i128
+  %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+  ret <4 x i32> %tmp10
+; CHECK: @test61
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test62(<3 x float> %call4) nounwind {
+  %tmp11 = bitcast <3 x float> %call4 to i96
+  %tmp9 = zext i96 %tmp11 to i128
+  %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+  ret <4 x i32> %tmp10
+; CHECK: @test62
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index a013a1d..37d27d4 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -1,32 +1,45 @@
 ; RUN: opt < %s -mem2reg -S | FileCheck %s
 
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-define double @testfunc(i32 %i, double %j) {
-	%I = alloca i32		; <i32*> [#uses=4]
-  call void @llvm.dbg.declare(metadata !{i32* %I}, metadata !0)
-	%J = alloca double		; <double*> [#uses=2]
-  call void @llvm.dbg.declare(metadata !{double* %J}, metadata !1)
+define double @testfunc(i32 %i, double %j) nounwind ssp {
+entry:
+  %i_addr = alloca i32                            ; <i32*> [#uses=2]
+  %j_addr = alloca double                         ; <double*> [#uses=2]
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8
 ; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !0)
-	store i32 %i, i32* %I
-; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata !1), !dbg !3
-	store double %j, double* %J, !dbg !3
-	%t1 = load i32* %I		; <i32> [#uses=1]
-	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
-	store i32 %t2, i32* %I
-	%t3 = load i32* %I		; <i32> [#uses=1]
-	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
-	%t5 = load double* %J		; <double> [#uses=1]
-	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
-	ret double %t6
+; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata !8)
+  store i32 %i, i32* %i_addr
+  call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8
+  store double %j, double* %j_addr
+  %1 = load i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, 1, !dbg !10                ; <i32> [#uses=1]
+  %3 = sitofp i32 %2 to double, !dbg !10          ; <double> [#uses=1]
+  %4 = load double* %j_addr, align 8, !dbg !10    ; <double> [#uses=1]
+  %5 = fadd double %3, %4, !dbg !10               ; <double> [#uses=1]
+  store double %5, double* %0, align 8, !dbg !10
+  %6 = load double* %0, align 8, !dbg !10         ; <double> [#uses=1]
+  store double %6, double* %retval, align 8, !dbg !10
+  br label %return, !dbg !10
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !10       ; <double> [#uses=1]
+  ret double %retval1, !dbg !10
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!bar = !{!0}
-!foo = !{!2}
+!0 = metadata !{i32 524545, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7, metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 2, i32 0, metadata !1, null}
+!9 = metadata !{i32 524545, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 3, i32 0, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
 
-!0 = metadata !{i32 459008, metadata !1, metadata !"foo", metadata !2, i32 5, metadata !"foo"} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 459008, metadata !1, metadata !"foo", metadata !0, i32 5, metadata !1} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 458804, i32 0, metadata !2, metadata !"foo", metadata !"bar", metadata !"bar", metadata !2, i32 3, metadata !0, i1 false, i1 true} ; [ DW_TAG_variable ]
-!3 = metadata !{i32 4, i32 0, metadata !0, null}
diff --git a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index 0c9cc8b..6956faa 100644
--- a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -1,19 +1,33 @@
-; RUN: opt < %s -simplifycfg -S | grep {volatile load}
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 ; PR2967
 
 target datalayout =
 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32"
 target triple = "i386-pc-linux-gnu"
 
-define void @foo(i32 %x) nounwind {
+define void @test1(i32 %x) nounwind {
 entry:
         %0 = icmp eq i32 %x, 0          ; <i1> [#uses=1]
         br i1 %0, label %bb, label %return
 
 bb:             ; preds = %entry
-        %1 = volatile load i32* null            ; <i32> [#uses=0]
+        %1 = volatile load i32* null
         unreachable
+        
         br label %return
 return:         ; preds = %entry
         ret void
+; CHECK: @test1
+; CHECK: volatile load
+}
+
+; rdar://7958343
+define void @test2() nounwind {
+entry:
+        store i32 4,i32* null
+        ret void
+        
+; CHECK: @test2
+; CHECK: call void @llvm.trap
+; CHECK: unreachable
 }
diff --git a/test/Transforms/SimplifyLibCalls/memcmp.ll b/test/Transforms/SimplifyLibCalls/memcmp.ll
index 640d232..ee99501 100644
--- a/test/Transforms/SimplifyLibCalls/memcmp.ll
+++ b/test/Transforms/SimplifyLibCalls/memcmp.ll
@@ -1,5 +1,5 @@
 ; Test that the memcmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | not grep {call.*memcmp}
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=0]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=0]
@@ -9,14 +9,26 @@ declare i32 @memcmp(i8*, i8*, i32)
 
 define void @test(i8* %P, i8* %Q, i32 %N, i32* %IP, i1* %BP) {
 	%A = call i32 @memcmp( i8* %P, i8* %P, i32 %N )		; <i32> [#uses=1]
+; CHECK-NOT: call {{.*}} memcmp
+; CHECK: volatile store
 	volatile store i32 %A, i32* %IP
 	%B = call i32 @memcmp( i8* %P, i8* %Q, i32 0 )		; <i32> [#uses=1]
+; CHECK-NOT: call {{.*}} memcmp
+; CHECK: volatile store
 	volatile store i32 %B, i32* %IP
 	%C = call i32 @memcmp( i8* %P, i8* %Q, i32 1 )		; <i32> [#uses=1]
+; CHECK: load
+; CHECK: zext
+; CHECK: load
+; CHECK: zext
+; CHECK: sub
+; CHECK: volatile store
 	volatile store i32 %C, i32* %IP
         %F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0),
                               i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0),
                               i32 3)
+; CHECK-NOT: call {{.*}} memcmp
+; CHECK: volatile store
         volatile store i32 %F, i32* %IP
 	ret void
 }
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
new file mode 100644
index 0000000..beb9481
--- /dev/null
+++ b/test/Transforms/Sink/basic.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -sink -S | FileCheck %s
+
+@A = external global i32
+@B = external global i32
+
+; Sink should sink the load past the store (which doesn't overlap) into
+; the block that uses it.
+
+;      CHECK: @foo
+;      CHECK: true:
+; CHECK-NEXT: %l = load i32* @A
+; CHECK-NEXT: ret i32 %l
+
+define i32 @foo(i1 %z) {
+  %l = load i32* @A
+  store i32 0, i32* @B
+  br i1 %z, label %true, label %false
+true:
+  ret i32 %l
+false:
+  ret i32 0
+}
diff --git a/test/Transforms/Sink/dg.exp b/test/Transforms/Sink/dg.exp
new file mode 100644
index 0000000..f200589
--- /dev/null
+++ b/test/Transforms/Sink/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 9eb3314..57f12d5 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -118,6 +118,10 @@ namespace {
                cl::desc("<safe-tool arguments>..."),
                cl::ZeroOrMore, cl::PositionalEatsArgs);
 
+  cl::opt<std::string>
+  GCCBinary("gcc", cl::init("gcc"), 
+              cl::desc("The gcc binary to use. (default 'gcc')"));
+
   cl::list<std::string>
   GCCToolArgv("gcc-tool-args", cl::Positional,
               cl::desc("<gcc-tool arguments>..."),
@@ -143,8 +147,8 @@ bool BugDriver::initializeExecutionEnvironment() {
   case AutoPick:
     InterpreterSel = RunCBE;
     Interpreter =
-      AbstractInterpreter::createCBE(getToolName(), Message, &ToolArgv,
-                                     &GCCToolArgv);
+      AbstractInterpreter::createCBE(getToolName(), Message, GCCBinary,
+                                     &ToolArgv, &GCCToolArgv);
     if (!Interpreter) {
       InterpreterSel = RunJIT;
       Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
@@ -153,7 +157,8 @@ bool BugDriver::initializeExecutionEnvironment() {
     if (!Interpreter) {
       InterpreterSel = RunLLC;
       Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                   &ToolArgv, &GCCToolArgv);
+                                                   GCCBinary, &ToolArgv, 
+                                                   &GCCToolArgv);
     }
     if (!Interpreter) {
       InterpreterSel = RunLLI;
@@ -173,7 +178,8 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunLLCIA:
   case LLC_Safe:
     Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                 &ToolArgv, &GCCToolArgv,
+                                                 GCCBinary, &ToolArgv, 
+                                                 &GCCToolArgv,
                                                  InterpreterSel == RunLLCIA);
     break;
   case RunJIT:
@@ -183,7 +189,8 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunCBE:
   case CBE_bug:
     Interpreter = AbstractInterpreter::createCBE(getToolName(), Message,
-                                                 &ToolArgv, &GCCToolArgv);
+                                                 GCCBinary, &ToolArgv, 
+                                                 &GCCToolArgv);
     break;
   case Custom:
     Interpreter = AbstractInterpreter::createCustom(Message, CustomExecCommand);
@@ -209,6 +216,7 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                       GCCBinary, 
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -219,6 +227,7 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                       GCCBinary, 
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -230,6 +239,7 @@ bool BugDriver::initializeExecutionEnvironment() {
         InterpreterSel != RunCBE) {
       SafeInterpreterSel = RunCBE;
       SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
+                                                       GCCBinary,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -239,6 +249,7 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                       GCCBinary, 
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -251,13 +262,13 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunLLCIA:
     SafeToolArgs.push_back("--relocation-model=pic");
     SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                     &SafeToolArgs,
+                                                     GCCBinary, &SafeToolArgs,
                                                      &GCCToolArgv,
                                                 SafeInterpreterSel == RunLLCIA);
     break;
   case RunCBE:
     SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
-                                                     &SafeToolArgs,
+                                                     GCCBinary, &SafeToolArgs,
                                                      &GCCToolArgv);
     break;
   case Custom:
@@ -271,7 +282,7 @@ bool BugDriver::initializeExecutionEnvironment() {
   }
   if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
   
-  gcc = GCC::create(Message, &GCCToolArgv);
+  gcc = GCC::create(Message, GCCBinary, &GCCToolArgv);
   if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); }
 
   // If there was an error creating the selected interpreter, quit with error.
@@ -301,7 +312,7 @@ void BugDriver::compileProgram(Module *M, std::string *Error) {
   FileRemover BitcodeFileRemover(BitcodeFile, !SaveTemps);
 
   // Actually compile the program!
-  Interpreter->compileProgram(BitcodeFile.str(), Error);
+  Interpreter->compileProgram(BitcodeFile.str(), Error, Timeout, MemoryLimit);
 }
 
 
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 45bb745..71484a2 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -126,7 +126,8 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
   // Ok, so now we know that the prefix passes work, try running the suffix
   // passes on the result of the prefix passes.
   //
-  Module *PrefixOutput = ParseInputFile(BitcodeResult, BD.getContext());
+  OwningPtr<Module> PrefixOutput(ParseInputFile(BitcodeResult,
+                                                BD.getContext()));
   if (PrefixOutput == 0) {
     errs() << BD.getToolName() << ": Error reading bitcode file '"
            << BitcodeResult << "'!\n";
@@ -142,7 +143,7 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
             << "' passes compile correctly after the '"
             << getPassesString(Prefix) << "' passes: ";
 
-  Module *OriginalInput = BD.swapProgramIn(PrefixOutput);
+  OwningPtr<Module> OriginalInput(BD.swapProgramIn(PrefixOutput.take()));
   if (BD.runPasses(Suffix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
@@ -157,13 +158,13 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
     return InternalError;
   if (Diff) {
     outs() << " nope.\n";
-    delete OriginalInput;     // We pruned down the original input...
     return KeepSuffix;
   }
 
   // Otherwise, we must not be running the bad pass anymore.
   outs() << " yup.\n";      // No miscompilation!
-  delete BD.swapProgramIn(OriginalInput); // Restore orig program & free test
+  // Restore orig program & free test.
+  delete BD.swapProgramIn(OriginalInput.take());
   return NoFailure;
 }
 
@@ -222,15 +223,14 @@ static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
   }
   delete M2;   // We are done with this module.
 
-  Module *OldProgram = BD.swapProgramIn(M1);
+  OwningPtr<Module> OldProgram(BD.swapProgramIn(M1));
 
   // Execute the program.  If it does not match the expected output, we must
   // return true.
   bool Broken = BD.diffProgram("", "", false, &Error);
   if (!Error.empty()) {
     // Delete the linked module & restore the original
-    BD.swapProgramIn(OldProgram);
-    delete M1;
+    delete BD.swapProgramIn(OldProgram.take());
   }
   return Broken;
 }
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 7ade778..3149a7a 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -133,7 +133,9 @@ static int RunProgramRemotelyWithTimeout(const sys::Path &RemoteClientPath,
   return ReturnCode;
 }
 
-static std::string ProcessFailure(sys::Path ProgPath, const char** Args) {
+static std::string ProcessFailure(sys::Path ProgPath, const char** Args,
+                                  unsigned Timeout = 0,
+                                  unsigned MemoryLimit = 0) {
   std::ostringstream OS;
   OS << "\nError running tool:\n ";
   for (const char **Arg = Args; *Arg; ++Arg)
@@ -148,7 +150,8 @@ static std::string ProcessFailure(sys::Path ProgPath, const char** Args) {
     exit(1);
   }
   RunProgramWithTimeout(ProgPath, Args, sys::Path(""), ErrorFilename,
-                        ErrorFilename); // FIXME: check return code ?
+                        ErrorFilename, Timeout, MemoryLimit);
+  // FIXME: check return code ?
 
   // Print out the error messages generated by GCC if possible...
   std::ifstream ErrorFile(ErrorFilename.c_str());
@@ -353,7 +356,8 @@ AbstractInterpreter *AbstractInterpreter::createCustom(
 // LLC Implementation of AbstractIntepreter interface
 //
 GCC::FileType LLC::OutputCode(const std::string &Bitcode, 
-                              sys::Path &OutputAsmFile, std::string &Error) {
+                              sys::Path &OutputAsmFile, std::string &Error,
+                              unsigned Timeout, unsigned MemoryLimit) {
   const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
   sys::Path uniqueFile(Bitcode + Suffix);
   std::string ErrMsg;
@@ -386,14 +390,17 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
         errs() << "\n";
         );
   if (RunProgramWithTimeout(sys::Path(LLCPath), &LLCArgs[0],
-                            sys::Path(), sys::Path(), sys::Path()))
-    Error = ProcessFailure(sys::Path(LLCPath), &LLCArgs[0]);
+                            sys::Path(), sys::Path(), sys::Path(),
+                            Timeout, MemoryLimit))
+    Error = ProcessFailure(sys::Path(LLCPath), &LLCArgs[0],
+                           Timeout, MemoryLimit);
   return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;  
 }
 
-void LLC::compileProgram(const std::string &Bitcode, std::string *Error) {
+void LLC::compileProgram(const std::string &Bitcode, std::string *Error,
+                         unsigned Timeout, unsigned MemoryLimit) {
   sys::Path OutputAsmFile;
-  OutputCode(Bitcode, OutputAsmFile, *Error);
+  OutputCode(Bitcode, OutputAsmFile, *Error, Timeout, MemoryLimit);
   OutputAsmFile.eraseFromDisk();
 }
 
@@ -408,12 +415,12 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
                         unsigned MemoryLimit) {
 
   sys::Path OutputAsmFile;
-  GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error);
+  GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
+                                      MemoryLimit);
   FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
 
   std::vector<std::string> GCCArgs(ArgsForGCC);
   GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
-  GCCArgs.insert(GCCArgs.end(), gccArgs.begin(), gccArgs.end());
 
   // Assuming LLC worked, compile the result with GCC and run it.
   return gcc->ExecuteProgram(OutputAsmFile.str(), Args, FileKind,
@@ -425,6 +432,7 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
 ///
 LLC *AbstractInterpreter::createLLC(const char *Argv0,
                                     std::string &Message,
+                                    const std::string &GCCBinary,
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *GCCArgs,
                                     bool UseIntegratedAssembler) {
@@ -436,12 +444,12 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0,
   }
 
   Message = "Found llc: " + LLCPath + "\n";
-  GCC *gcc = GCC::create(Message, GCCArgs);
+  GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
   if (!gcc) {
     errs() << Message << "\n";
     exit(1);
   }
-  return new LLC(LLCPath, gcc, Args, GCCArgs, UseIntegratedAssembler);
+  return new LLC(LLCPath, gcc, Args, UseIntegratedAssembler);
 }
 
 //===---------------------------------------------------------------------===//
@@ -528,7 +536,8 @@ AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
 }
 
 GCC::FileType CBE::OutputCode(const std::string &Bitcode,
-                              sys::Path &OutputCFile, std::string &Error) {
+                              sys::Path &OutputCFile, std::string &Error,
+                              unsigned Timeout, unsigned MemoryLimit) {
   sys::Path uniqueFile(Bitcode+".cbe.c");
   std::string ErrMsg;
   if (uniqueFile.makeUnique(true, &ErrMsg)) {
@@ -556,14 +565,15 @@ GCC::FileType CBE::OutputCode(const std::string &Bitcode,
         errs() << "\n";
         );
   if (RunProgramWithTimeout(LLCPath, &LLCArgs[0], sys::Path(), sys::Path(),
-                            sys::Path()))
-    Error = ProcessFailure(LLCPath, &LLCArgs[0]);
+                            sys::Path(), Timeout, MemoryLimit))
+    Error = ProcessFailure(LLCPath, &LLCArgs[0], Timeout, MemoryLimit);
   return GCC::CFile;
 }
 
-void CBE::compileProgram(const std::string &Bitcode, std::string *Error) {
+void CBE::compileProgram(const std::string &Bitcode, std::string *Error,
+                         unsigned Timeout, unsigned MemoryLimit) {
   sys::Path OutputCFile;
-  OutputCode(Bitcode, OutputCFile, *Error);
+  OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
   OutputCFile.eraseFromDisk();
 }
 
@@ -577,7 +587,7 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
   sys::Path OutputCFile;
-  OutputCode(Bitcode, OutputCFile, *Error);
+  OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
 
   FileRemover CFileRemove(OutputCFile, !SaveTemps);
 
@@ -593,6 +603,7 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
 ///
 CBE *AbstractInterpreter::createCBE(const char *Argv0,
                                     std::string &Message,
+                                    const std::string &GCCBinary, 
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *GCCArgs) {
   sys::Path LLCPath =
@@ -604,7 +615,7 @@ CBE *AbstractInterpreter::createCBE(const char *Argv0,
   }
 
   Message = "Found llc: " + LLCPath.str() + "\n";
-  GCC *gcc = GCC::create(Message, GCCArgs);
+  GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
   if (!gcc) {
     errs() << Message << "\n";
     exit(1);
@@ -619,10 +630,9 @@ CBE *AbstractInterpreter::createCBE(const char *Argv0,
 static bool IsARMArchitecture(std::vector<std::string> Args) {
   for (std::vector<std::string>::const_iterator
          I = Args.begin(), E = Args.end(); I != E; ++I) {
-    StringRef S(*I);
-    if (!S.equals_lower("-arch")) {
+    if (StringRef(*I).equals_lower("-arch")) {
       ++I;
-      if (I != E && !S.substr(0, strlen("arm")).equals_lower("arm"))
+      if (I != E && StringRef(*I).substr(0, strlen("arm")).equals_lower("arm"))
         return true;
     }
   }
@@ -852,10 +862,11 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
 /// create - Try to find the `gcc' executable
 ///
 GCC *GCC::create(std::string &Message,
+                 const std::string &GCCBinary,
                  const std::vector<std::string> *Args) {
-  sys::Path GCCPath = sys::Program::FindProgramByName("gcc");
+  sys::Path GCCPath = sys::Program::FindProgramByName(GCCBinary);
   if (GCCPath.isEmpty()) {
-    Message = "Cannot find `gcc' in executable directory or PATH!\n";
+    Message = "Cannot find `"+ GCCBinary +"' in executable directory or PATH!\n";
     return 0;
   }
 
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index cba10f2..d966fc0 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -49,6 +49,7 @@ public:
   enum FileType { AsmFile, ObjectFile, CFile };
 
   static GCC *create(std::string &Message,
+                     const std::string &GCCBinary,
                      const std::vector<std::string> *Args);
 
   /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
@@ -87,9 +88,11 @@ public:
 class AbstractInterpreter {
 public:
   static CBE *createCBE(const char *Argv0, std::string &Message,
+                        const std::string              &GCCBinary,
                         const std::vector<std::string> *Args = 0,
                         const std::vector<std::string> *GCCArgs = 0);
   static LLC *createLLC(const char *Argv0, std::string &Message,
+                        const std::string              &GCCBinary,
                         const std::vector<std::string> *Args = 0,
                         const std::vector<std::string> *GCCArgs = 0,
                         bool UseIntegratedAssembler = false);
@@ -109,14 +112,17 @@ public:
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
   /// the code generator.  It returns false if the code generator fails.
-  virtual void compileProgram(const std::string &Bitcode, std::string *Error) {}
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
+                              unsigned Timeout = 0, unsigned MemoryLimit = 0) {}
 
   /// OutputCode - Compile the specified program from bitcode to code
   /// understood by the GCC driver (either C or asm).  If the code generator
   /// fails, it sets Error, otherwise, this function returns the type of code
   /// emitted.
   virtual GCC::FileType OutputCode(const std::string &Bitcode,
-                                   sys::Path &OutFile, std::string &Error) {
+                                   sys::Path &OutFile, std::string &Error,
+                                   unsigned Timeout = 0,
+                                   unsigned MemoryLimit = 0) {
     Error = "OutputCode not supported by this AbstractInterpreter!";
     return GCC::AsmFile;
   }
@@ -158,7 +164,8 @@ public:
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
   /// the code generator.  Returns false if the code generator fails.
-  virtual void compileProgram(const std::string &Bitcode, std::string *Error);
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
+                              unsigned Timeout = 0, unsigned MemoryLimit = 0);
 
   virtual int ExecuteProgram(const std::string &Bitcode,
                              const std::vector<std::string> &Args,
@@ -177,7 +184,9 @@ public:
   /// fails, it sets Error, otherwise, this function returns the type of code
   /// emitted.
   virtual GCC::FileType OutputCode(const std::string &Bitcode,
-                                   sys::Path &OutFile, std::string &Error);
+                                   sys::Path &OutFile, std::string &Error,
+                                   unsigned Timeout = 0,
+                                   unsigned MemoryLimit = 0);
 };
 
 
@@ -187,26 +196,24 @@ public:
 class LLC : public AbstractInterpreter {
   std::string LLCPath;               // The path to the LLC executable.
   std::vector<std::string> ToolArgs; // Extra args to pass to LLC.
-  std::vector<std::string> gccArgs;  // Extra args to pass to GCC.
   GCC *gcc;
   bool UseIntegratedAssembler;
 public:
   LLC(const std::string &llcPath, GCC *Gcc,
       const std::vector<std::string> *Args,
-      const std::vector<std::string> *GCCArgs,
       bool useIntegratedAssembler)
     : LLCPath(llcPath), gcc(Gcc),
       UseIntegratedAssembler(useIntegratedAssembler) {
     ToolArgs.clear();
     if (Args) ToolArgs = *Args;
-    if (GCCArgs) gccArgs = *GCCArgs;
   }
   ~LLC() { delete gcc; }
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
   /// the code generator.  Returns false if the code generator fails.
-  virtual void compileProgram(const std::string &Bitcode, std::string *Error);
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
+                              unsigned Timeout = 0, unsigned MemoryLimit = 0);
 
   virtual int ExecuteProgram(const std::string &Bitcode,
                              const std::vector<std::string> &Args,
@@ -225,7 +232,9 @@ public:
   /// fails, it sets Error, otherwise, this function returns the type of code
   /// emitted.
   virtual GCC::FileType OutputCode(const std::string &Bitcode,
-                                   sys::Path &OutFile, std::string &Error);
+                                   sys::Path &OutFile, std::string &Error,
+                                   unsigned Timeout = 0,
+                                   unsigned MemoryLimit = 0);
 };
 
 } // End llvm namespace
diff --git a/tools/edis/EDInst.cpp b/tools/edis/EDInst.cpp
index af3a54a..c009f0f 100644
--- a/tools/edis/EDInst.cpp
+++ b/tools/edis/EDInst.cpp
@@ -81,7 +81,9 @@ unsigned EDInst::instID() {
 
 bool EDInst::isBranch() {
   if (ThisInstInfo)
-    return ThisInstInfo->instructionType == kInstructionTypeBranch;
+    return 
+      ThisInstInfo->instructionType == kInstructionTypeBranch ||
+      ThisInstInfo->instructionType == kInstructionTypeCall;
   else
     return false;
 }
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index 77eacb7..1627346 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-LIBRARYNAME = libLLVMgold
+LIBRARYNAME = LLVMgold
 
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports
 
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 16e645f..2e5c179 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -59,6 +59,7 @@ namespace {
 
 namespace options {
   static bool generate_api_file = false;
+  static std::string bc_path;
   static const char *as_path = NULL;
   // Additional options to pass into the code generator.
   // Note: This array will contain all plugin options which are not claimed 
@@ -81,6 +82,14 @@ namespace options {
       } else {
         as_path = strdup(opt + 3);
       }
+    } else if(llvm::StringRef(opt).startswith("also-emit-llvm=")) {
+      const char *path = opt + strlen("also-emit-llvm=");
+      if (bc_path != "") {
+        (*message)(LDPL_WARNING, "Path to the output IL file specified twice. "
+                   "Discarding %s", opt);
+      } else {
+        bc_path = path;
+      }
     } else {
       // Save this option to pass to the code generator.
       extra.push_back(std::string(opt));
@@ -374,6 +383,11 @@ static ld_plugin_status all_symbols_read_hook(void) {
     }
   }
 
+  if (options::bc_path != "") {
+    bool err = lto_codegen_write_merged_modules(cg, options::bc_path.c_str());
+    if (err)
+      (*message)(LDPL_FATAL, "Failed to write the output file.");
+  }
   size_t bufsize = 0;
   const char *buffer = static_cast<const char *>(lto_codegen_compile(cg,
                                                                      &bufsize));
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 378cc63..f3eed56 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -119,7 +119,8 @@ GetFileNameRoot(const std::string &InputFilename) {
   return outputFilename;
 }
 
-static formatted_raw_ostream *GetOutputStream(const char *TargetName, 
+static formatted_raw_ostream *GetOutputStream(const char *TargetName,
+                                              Triple::OSType OS,
                                               const char *ProgName) {
   if (OutputFilename != "") {
     if (OutputFilename == "-")
@@ -166,7 +167,10 @@ static formatted_raw_ostream *GetOutputStream(const char *TargetName,
       OutputFilename += ".s";
     break;
   case TargetMachine::CGFT_ObjectFile:
-    OutputFilename += ".o";
+    if (OS == Triple::Win32)
+      OutputFilename += ".obj";
+    else
+      OutputFilename += ".o";
     Binary = true;
     break;
   case TargetMachine::CGFT_Null:
@@ -284,7 +288,8 @@ int main(int argc, char **argv) {
   TargetMachine &Target = *target.get();
 
   // Figure out where we are going to send the output...
-  formatted_raw_ostream *Out = GetOutputStream(TheTarget->getName(), argv[0]);
+  formatted_raw_ostream *Out = GetOutputStream(TheTarget->getName(),
+                                               TheTriple.getOS(), argv[0]);
   if (Out == 0) return 1;
 
   CodeGenOpt::Level OLvl = CodeGenOpt::Default;
@@ -307,75 +312,34 @@ int main(int argc, char **argv) {
   bool DisableVerify = true;
 #endif
 
-  // If this target requires addPassesToEmitWholeFile, do it now.  This is
-  // used by strange things like the C backend.
-  if (Target.WantsWholeFile()) {
-    PassManager PM;
-
-    // Add the target data from the target machine, if it exists, or the module.
-    if (const TargetData *TD = Target.getTargetData())
-      PM.add(new TargetData(*TD));
-    else
-      PM.add(new TargetData(&mod));
-
-    if (!NoVerify)
-      PM.add(createVerifierPass());
-
-    // Ask the target to add backend passes as necessary.
-    if (Target.addPassesToEmitWholeFile(PM, *Out, FileType, OLvl,
-                                        DisableVerify)) {
-      errs() << argv[0] << ": target does not support generation of this"
-             << " file type!\n";
-      if (Out != &fouts()) delete Out;
-      // And the Out file is empty and useless, so remove it now.
-      sys::Path(OutputFilename).eraseFromDisk();
-      return 1;
-    }
-    PM.run(mod);
-  } else {
-    // Build up all of the passes that we want to do to the module.
-    FunctionPassManager Passes(M.get());
-
-    // Add the target data from the target machine, if it exists, or the module.
-    if (const TargetData *TD = Target.getTargetData())
-      Passes.add(new TargetData(*TD));
-    else
-      Passes.add(new TargetData(&mod));
-
-#ifndef NDEBUG
-    if (!NoVerify)
-      Passes.add(createVerifierPass());
-#endif
-
-    // Override default to generate verbose assembly.
-    Target.setAsmVerbosityDefault(true);
-
-    if (Target.addPassesToEmitFile(Passes, *Out, FileType, OLvl,
-                                   DisableVerify)) {
-      errs() << argv[0] << ": target does not support generation of this"
-             << " file type!\n";
-      if (Out != &fouts()) delete Out;
-      // And the Out file is empty and useless, so remove it now.
-      sys::Path(OutputFilename).eraseFromDisk();
-      return 1;
-    }
-
-    Passes.doInitialization();
-
-    // Run our queue of passes all at once now, efficiently.
-    // TODO: this could lazily stream functions out of the module.
-    for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I)
-      if (!I->isDeclaration()) {
-        if (DisableRedZone)
-          I->addFnAttr(Attribute::NoRedZone);
-        if (NoImplicitFloats)
-          I->addFnAttr(Attribute::NoImplicitFloat);
-        Passes.run(*I);
-      }
-
-    Passes.doFinalization();
+  // Build up all of the passes that we want to do to the module.
+  PassManager PM;
+
+  // Add the target data from the target machine, if it exists, or the module.
+  if (const TargetData *TD = Target.getTargetData())
+    PM.add(new TargetData(*TD));
+  else
+    PM.add(new TargetData(&mod));
+
+  if (!NoVerify)
+    PM.add(createVerifierPass());
+
+  // Override default to generate verbose assembly.
+  Target.setAsmVerbosityDefault(true);
+
+  // Ask the target to add backend passes as necessary.
+  if (Target.addPassesToEmitFile(PM, *Out, FileType, OLvl,
+                                 DisableVerify)) {
+    errs() << argv[0] << ": target does not support generation of this"
+           << " file type!\n";
+    if (Out != &fouts()) delete Out;
+    // And the Out file is empty and useless, so remove it now.
+    sys::Path(OutputFilename).eraseFromDisk();
+    return 1;
   }
 
+  PM.run(mod);
+
   // Delete the ostream if it's not a stdout stream
   if (Out != &fouts()) delete Out;
 
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index ce52b59..ad6956c 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -266,7 +266,6 @@ static int GenerateAssembly(const std::string &OutputFilename,
   // We will use GCC to assemble the program so set the assembly syntax to AT&T,
   // regardless of what the target in the bitcode file is.
   args.push_back("-x86-asm-syntax=att");
-  args.push_back("-f");
   args.push_back("-o");
   args.push_back(OutputFilename.c_str());
   args.push_back(InputFilename.c_str());
@@ -289,7 +288,6 @@ static int GenerateCFile(const std::string &OutputFile,
   std::vector<const char*> args;
   args.push_back(llc.c_str());
   args.push_back("-march=c");
-  args.push_back("-f");
   args.push_back("-o");
   args.push_back(OutputFile.c_str());
   args.push_back(InputFile.c_str());
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 9234a14..37b2cb8 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -314,6 +314,11 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
   }
   
   outs() << " ";
+    
+  if (EDInstIsBranch(inst))
+    outs() << "<br> ";
+  if (EDInstIsMove(inst))
+    outs() << "<mov> ";
   
   int numOperands = EDNumOperands(inst);
   
@@ -342,6 +347,8 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
                       operand, 
                       verboseEvaluator, 
                       &disassembler);
+      
+    outs() << "=" << evaluatedResult;
     
     outs() << " ";
   }
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 47f67c5..a114ab0 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -58,6 +58,9 @@ OutputAsmVariant("output-asm-variant",
 static cl::opt<bool>
 RelaxAll("mc-relax-all", cl::desc("Relax all fixups"));
 
+static cl::opt<bool>
+EnableLogging("enable-api-logging", cl::desc("Enable MC API logging"));
+
 enum OutputFileType {
   OFT_Null,
   OFT_AssemblyFile,
@@ -75,9 +78,6 @@ FileType("filetype", cl::init(OFT_AssemblyFile),
                   "Emit a native object ('.o') file"),
        clEnumValEnd));
 
-static cl::opt<bool>
-Force("f", cl::desc("Enable binary output on terminals"));
-
 static cl::list<std::string>
 IncludeDirs("I", cl::desc("Directory of include files"),
             cl::value_desc("directory"), cl::Prefix);
@@ -304,7 +304,12 @@ static int AssembleInput(const char *ProgName) {
     assert(FileType == OFT_ObjectFile && "Invalid file type!");
     CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
     TAB.reset(TheTarget->createAsmBackend(TripleName));
-    Str.reset(createMachOStreamer(Ctx, *TAB, *Out, CE.get(), RelaxAll));
+    Str.reset(TheTarget->createObjectStreamer(TripleName, Ctx, *TAB,
+                                              *Out, CE.get(), RelaxAll));
+  }
+
+  if (EnableLogging) {
+    Str.reset(createLoggingStreamer(Str.take(), errs()));
   }
 
   AsmParser Parser(SrcMgr, Ctx, *Str.get(), *MAI);
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 1010592..59e8405 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -300,8 +300,9 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
         }
 
         // construct LTModule, hand over ownership of module and target
-        const std::string FeatureStr =
-           SubtargetFeatures::getDefaultSubtargetFeatures(llvm::Triple(Triple));
+        SubtargetFeatures Features;
+        Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
+        std::string FeatureStr = Features.getString();
         _target = march->createTargetMachine(Triple, FeatureStr);
     }
     return false;
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index b269e78..0870205 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -140,8 +140,9 @@ LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer,
         return NULL;
 
     // construct LTModule, hand over ownership of module and target
-    const std::string FeatureStr = 
-        SubtargetFeatures::getDefaultSubtargetFeatures(llvm::Triple(Triple));
+    SubtargetFeatures Features;
+    Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
+    std::string FeatureStr = Features.getString();
     TargetMachine* target = march->createTargetMachine(Triple, FeatureStr);
     return new LTOModule(m.take(), target);
 }
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index ff19942..51b920f 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -268,7 +268,7 @@ struct BasicBlockPassPrinter : public BasicBlockPass {
 };
 
 char BasicBlockPassPrinter::ID = 0;
-inline void addPass(PassManager &PM, Pass *P) {
+inline void addPass(PassManagerBase &PM, Pass *P) {
   // Add the pass to the pass manager...
   PM.add(P);
 
@@ -281,7 +281,7 @@ inline void addPass(PassManager &PM, Pass *P) {
 /// duplicates llvm-gcc behaviour.
 ///
 /// OptLevel - Optimization Level
-void AddOptimizationPasses(PassManager &MPM, FunctionPassManager &FPM,
+void AddOptimizationPasses(PassManagerBase &MPM, PassManagerBase &FPM,
                            unsigned OptLevel) {
   createStandardFunctionPasses(&FPM, OptLevel);
 
@@ -305,7 +305,7 @@ void AddOptimizationPasses(PassManager &MPM, FunctionPassManager &FPM,
                              InliningPass);
 }
 
-void AddStandardCompilePasses(PassManager &PM) {
+void AddStandardCompilePasses(PassManagerBase &PM) {
   PM.add(createVerifierPass());                  // Verify that input is correct
 
   addPass(PM, createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
@@ -328,7 +328,7 @@ void AddStandardCompilePasses(PassManager &PM) {
                              InliningPass);
 }
 
-void AddStandardLinkPasses(PassManager &PM) {
+void AddStandardLinkPasses(PassManagerBase &PM) {
   PM.add(createVerifierPass());                  // Verify that input is correct
 
   // If the -strip-debug command line option was specified, do it.
@@ -422,9 +422,9 @@ int main(int argc, char **argv) {
   if (TD)
     Passes.add(TD);
 
-  OwningPtr<FunctionPassManager> FPasses;
+  OwningPtr<PassManager> FPasses;
   if (OptLevelO1 || OptLevelO2 || OptLevelO3) {
-    FPasses.reset(new FunctionPassManager(M.get()));
+    FPasses.reset(new PassManager());
     if (TD)
       FPasses->add(new TargetData(*TD));
   }
@@ -521,12 +521,8 @@ int main(int argc, char **argv) {
   if (OptLevelO3)
     AddOptimizationPasses(Passes, *FPasses, 3);
 
-  if (OptLevelO1 || OptLevelO2 || OptLevelO3) {
-    FPasses->doInitialization();
-    for (Module::iterator I = M.get()->begin(), E = M.get()->end();
-         I != E; ++I)
-      FPasses->run(*I);
-  }
+  if (OptLevelO1 || OptLevelO2 || OptLevelO3)
+    FPasses->run(*M.get());
 
   // Check that the module is well formed on completion of optimization
   if (!NoVerify && !VerifyEach)
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index b0dcb0a..887ba5d 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -53,6 +53,17 @@ TEST(StringRefTest, StringOps) {
   EXPECT_EQ( 1, StringRef("aab").compare("aaa"));
   EXPECT_EQ(-1, StringRef("aab").compare("aabb"));
   EXPECT_EQ( 1, StringRef("aab").compare("aa"));
+
+  EXPECT_EQ(-1, StringRef("aab").compare_numeric("aad"));
+  EXPECT_EQ( 0, StringRef("aab").compare_numeric("aab"));
+  EXPECT_EQ( 1, StringRef("aab").compare_numeric("aaa"));
+  EXPECT_EQ(-1, StringRef("aab").compare_numeric("aabb"));
+  EXPECT_EQ( 1, StringRef("aab").compare_numeric("aa"));
+  EXPECT_EQ(-1, StringRef("1").compare_numeric("10"));
+  EXPECT_EQ( 0, StringRef("10").compare_numeric("10"));
+  EXPECT_EQ( 0, StringRef("10a").compare_numeric("10a"));
+  EXPECT_EQ( 1, StringRef("2").compare_numeric("1"));
+  EXPECT_EQ( 0, StringRef("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty"));
 }
 
 TEST(StringRefTest, Operators) {
diff --git a/unittests/ADT/ilistTest.cpp b/unittests/ADT/ilistTest.cpp
new file mode 100644
index 0000000..09a699a
--- /dev/null
+++ b/unittests/ADT/ilistTest.cpp
@@ -0,0 +1,44 @@
+//===- llvm/unittest/ADT/APInt.cpp - APInt unit tests ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ostream>
+#include "gtest/gtest.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+
+using namespace llvm;
+
+namespace {
+
+struct Node : ilist_node<Node> {
+  int Value;
+
+  Node() {}
+  Node(int _Value) : Value(_Value) {}
+};
+
+TEST(ilistTest, Basic) {
+  ilist<Node> List;
+  List.push_back(Node(1));
+  EXPECT_EQ(1, List.back().Value);
+  EXPECT_EQ(0, List.back().getPrevNode());
+  EXPECT_EQ(0, List.back().getNextNode());
+
+  List.push_back(Node(2));
+  EXPECT_EQ(2, List.back().Value);
+  EXPECT_EQ(2, List.front().getNextNode()->Value);
+  EXPECT_EQ(1, List.back().getPrevNode()->Value);
+
+  const ilist<Node> &ConstList = List;
+  EXPECT_EQ(2, ConstList.back().Value);
+  EXPECT_EQ(2, ConstList.front().getNextNode()->Value);
+  EXPECT_EQ(1, ConstList.back().getPrevNode()->Value);
+}
+
+}
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 1947824..4ba3df1 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -388,6 +388,9 @@ public:
 
   /// operator< - Compare two classes.
   bool operator<(const ClassInfo &RHS) const {
+    if (this == &RHS)
+      return false;
+
     // Unrelated classes can be ordered by kind.
     if (!isRelatedTo(RHS))
       return Kind < RHS.Kind;
@@ -403,7 +406,13 @@ public:
 
     default:
       // This class preceeds the RHS if it is a proper subset of the RHS.
-      return this != &RHS && isSubsetOf(RHS);
+      if (isSubsetOf(RHS))
+	return true;
+      if (RHS.isSubsetOf(*this))
+	return false;
+
+      // Otherwise, order by name to ensure we have a total ordering.
+      return ValueName < RHS.ValueName;
     }
   }
 };
@@ -794,15 +803,19 @@ void AsmMatcherInfo::BuildOperandClasses(CodeGenTarget &Target) {
     ClassInfo *CI = AsmOperandClasses[*it];
     CI->Kind = ClassInfo::UserClass0 + Index;
 
-    Init *Super = (*it)->getValueInit("SuperClass");
-    if (DefInit *DI = dynamic_cast<DefInit*>(Super)) {
+    ListInit *Supers = (*it)->getValueAsListInit("SuperClasses");
+    for (unsigned i = 0, e = Supers->getSize(); i != e; ++i) {
+      DefInit *DI = dynamic_cast<DefInit*>(Supers->getElement(i));
+      if (!DI) {
+        PrintError((*it)->getLoc(), "Invalid super class reference!");
+        continue;
+      }
+
       ClassInfo *SC = AsmOperandClasses[DI->getDef()];
       if (!SC)
         PrintError((*it)->getLoc(), "Invalid super class reference!");
       else
         CI->SuperClasses.push_back(SC);
-    } else {
-      assert(dynamic_cast<UnsetInit*>(Super) && "Unexpected SuperClass field!");
     }
     CI->ClassName = (*it)->getValueAsString("Name");
     CI->Name = "MCK_" + CI->ClassName;
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index f68159a..731cde9 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_executable(tblgen
   AsmWriterEmitter.cpp
   AsmWriterInst.cpp
   CallingConvEmitter.cpp
+  ClangASTNodesEmitter.cpp
   ClangDiagnosticsEmitter.cpp
   CodeEmitterGen.cpp
   CodeGenDAGPatterns.cpp
diff --git a/utils/TableGen/ClangASTNodesEmitter.cpp b/utils/TableGen/ClangASTNodesEmitter.cpp
new file mode 100644
index 0000000..5d6423d
--- /dev/null
+++ b/utils/TableGen/ClangASTNodesEmitter.cpp
@@ -0,0 +1,141 @@
+//=== ClangASTNodesEmitter.cpp - Generate Clang AST node tables -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang AST node tables
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangASTNodesEmitter.h"
+#include "Record.h"
+#include <map>
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Statement Node Tables (.inc file) generation.
+//===----------------------------------------------------------------------===//
+
+// Create a macro-ized version of a name
+static std::string macroName(std::string S) {
+  for (unsigned i = 0; i < S.size(); ++i)
+    S[i] = std::toupper(S[i]);
+
+  return S;
+}
+
+// A map from a node to each of its derived nodes.
+typedef std::multimap<Record*, Record*> ChildMap;
+typedef ChildMap::const_iterator ChildIterator;
+
+// Returns the first and last non-abstract subrecords
+// Called recursively to ensure that nodes remain contiguous
+static std::pair<Record *, Record *> EmitStmtNode(const ChildMap &Tree,
+                                                  raw_ostream &OS,
+                                                  Record *Base,
+						  bool Root = true) {
+  std::string BaseName = macroName(Base->getName());
+
+  ChildIterator i = Tree.lower_bound(Base), e = Tree.upper_bound(Base);
+
+  Record *First = 0, *Last = 0;
+  // This might be the pseudo-node for Stmt; don't assume it has an Abstract
+  // bit
+  if (Base->getValue("Abstract") && !Base->getValueAsBit("Abstract"))
+    First = Last = Base;
+
+  for (; i != e; ++i) {
+    Record *R = i->second;
+    bool Abstract = R->getValueAsBit("Abstract");
+    std::string NodeName = macroName(R->getName());
+
+    OS << "#ifndef " << NodeName << "\n";
+    OS << "#  define " << NodeName << "(Type, Base) "
+        << BaseName << "(Type, Base)\n";
+    OS << "#endif\n";
+
+    if (Abstract)
+      OS << "ABSTRACT_STMT(" << NodeName << "(" << R->getName() << ", "
+          << Base->getName() << "))\n";
+    else
+      OS << NodeName << "(" << R->getName() << ", "
+          << Base->getName() << ")\n";
+
+    if (Tree.find(R) != Tree.end()) {
+      const std::pair<Record *, Record *> &Result
+        = EmitStmtNode(Tree, OS, R, false);
+      if (!First && Result.first)
+        First = Result.first;
+      if (Result.second)
+        Last = Result.second;
+    } else {
+      if (!Abstract) {
+        Last = R;
+
+        if (!First)
+          First = R;
+      }
+    }
+
+    OS << "#undef " << NodeName << "\n\n";
+  }
+
+  if (First) {
+    assert (Last && "Got a first node but not a last node for a range!");
+    if (Root)
+      OS << "LAST_STMT_RANGE(";
+    else
+      OS << "STMT_RANGE(";
+ 
+    OS << Base->getName() << ", " << First->getName() << ", "
+       << Last->getName() << ")\n\n";
+  }
+
+  return std::make_pair(First, Last);
+}
+
+void ClangStmtNodesEmitter::run(raw_ostream &OS) {
+  // Write the preamble
+  OS << "#ifndef ABSTRACT_STMT\n";
+  OS << "#  define ABSTRACT_STMT(Stmt) Stmt\n";
+  OS << "#endif\n";
+
+  OS << "#ifndef STMT_RANGE\n";
+  OS << "#  define STMT_RANGE(Base, First, Last)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef LAST_STMT_RANGE\n";
+  OS << "#  define LAST_STMT_RANGE(Base, First, Last) "
+          "STMT_RANGE(Base, First, Last)\n";
+  OS << "#endif\n\n";
+ 
+  // Emit statements
+  const std::vector<Record*> Stmts = Records.getAllDerivedDefinitions("Stmt");
+
+  ChildMap Tree;
+
+  // Create a pseudo-record to serve as the Stmt node, which isn't actually
+  // output.
+  Record Stmt ("Stmt", SMLoc());
+
+  for (unsigned i = 0, e = Stmts.size(); i != e; ++i) {
+    Record *R = Stmts[i];
+
+    if (R->getValue("Base"))
+      Tree.insert(std::make_pair(R->getValueAsDef("Base"), R));
+    else
+      Tree.insert(std::make_pair(&Stmt, R));
+  }
+
+  EmitStmtNode(Tree, OS, &Stmt);
+
+  OS << "#undef STMT\n";
+  OS << "#undef STMT_RANGE\n";
+  OS << "#undef LAST_STMT_RANGE\n";
+  OS << "#undef ABSTRACT_STMT\n";
+}
diff --git a/utils/TableGen/ClangASTNodesEmitter.h b/utils/TableGen/ClangASTNodesEmitter.h
new file mode 100644
index 0000000..c4ce9fa
--- /dev/null
+++ b/utils/TableGen/ClangASTNodesEmitter.h
@@ -0,0 +1,36 @@
+//===- ClangASTNodesEmitter.h - Generate Clang AST node tables -*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang AST node tables
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANGAST_EMITTER_H
+#define CLANGAST_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+/// ClangStmtNodesEmitter - The top-level class emits .def files containing
+///  declarations of Clang statements.
+///
+class ClangStmtNodesEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  explicit ClangStmtNodesEmitter(RecordKeeper &R)
+    : Records(R) {}
+
+  // run - Output the .def file contents
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index d0e813b..75b6252 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -70,15 +70,16 @@ getCategoryFromDiagGroup(const Record *Group,
 /// lives in.
 static std::string getDiagnosticCategory(const Record *R,
                                          DiagGroupParentMap &DiagGroupParents) {
-  // If the diagnostic itself has a category, get it.
-  std::string CatName = R->getValueAsString("CategoryName");
-  if (!CatName.empty()) return CatName;
-  
-  DefInit *Group = dynamic_cast<DefInit*>(R->getValueInit("Group"));
-  if (Group == 0) return "";
+  // If the diagnostic is in a group, and that group has a category, use it.
+  if (DefInit *Group = dynamic_cast<DefInit*>(R->getValueInit("Group"))) {
+    // Check the diagnostic's diag group for a category.
+    std::string CatName = getCategoryFromDiagGroup(Group->getDef(),
+                                                   DiagGroupParents);
+    if (!CatName.empty()) return CatName;
+  }
   
-  // Check the diagnostic's diag group for a category.
-  return getCategoryFromDiagGroup(Group->getDef(), DiagGroupParents);
+  // If the diagnostic itself has a category, get it.
+  return R->getValueAsString("CategoryName");
 }
 
 namespace {
@@ -239,7 +240,7 @@ void ClangDiagGroupsEmitter::run(raw_ostream &OS) {
     
     const std::vector<std::string> &SubGroups = I->second.SubGroups;
     if (!SubGroups.empty()) {
-      OS << "static const char DiagSubGroup" << I->second.IDNo << "[] = { ";
+      OS << "static const short DiagSubGroup" << I->second.IDNo << "[] = { ";
       for (unsigned i = 0, e = SubGroups.size(); i != e; ++i) {
         std::map<std::string, GroupInfo>::iterator RI =
           DiagsInGroup.find(SubGroups[i]);
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index a0bccfc..878ed09 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -1057,6 +1057,11 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
     return EEVT::TypeSet(T.getRegisterVTs(R));
   }
+
+  if (R->isSubClassOf("SubRegIndex")) {
+    assert(ResNo == 0 && "SubRegisterIndices only produce one result!");
+    return EEVT::TypeSet();
+  }
   
   if (R->isSubClassOf("ValueType") || R->isSubClassOf("CondCode")) {
     assert(ResNo == 0 && "This node only has one result!");
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 6f8682b..344f77f 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -16,6 +16,7 @@
 #define CODEGEN_REGISTERS_H
 
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/DenseMap.h"
 #include <string>
 #include <vector>
 #include <cstdlib>
@@ -40,7 +41,8 @@ namespace llvm {
     unsigned SpillSize;
     unsigned SpillAlignment;
     int CopyCost;
-    std::vector<Record*> SubRegClasses;
+    // Map SubRegIndex -> RegisterClass
+    DenseMap<Record*,Record*> SubRegClasses;
     std::string MethodProtos, MethodBodies;
 
     const std::string &getName() const;
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 1efe2ff..3797992 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -80,6 +80,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v1i64: return "MVT::v1i64";
   case MVT::v2i64: return "MVT::v2i64";
   case MVT::v4i64: return "MVT::v4i64";
+  case MVT::v8i64: return "MVT::v8i64";
   case MVT::v2f32: return "MVT::v2f32";
   case MVT::v4f32: return "MVT::v4f32";
   case MVT::v8f32: return "MVT::v8f32";
@@ -158,6 +159,7 @@ void CodeGenTarget::ReadRegisters() const {
   std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register");
   if (Regs.empty())
     throw std::string("No 'Register' subclasses defined!");
+  std::sort(Regs.begin(), Regs.end(), LessRecord());
 
   Registers.reserve(Regs.size());
   Registers.assign(Regs.begin(), Regs.end());
@@ -172,6 +174,11 @@ const std::string &CodeGenRegister::getName() const {
   return TheDef->getName();
 }
 
+void CodeGenTarget::ReadSubRegIndices() const {
+  SubRegIndices = Records.getAllDerivedDefinitions("SubRegIndex");
+  std::sort(SubRegIndices.begin(), SubRegIndices.end(), LessRecord());
+}
+
 void CodeGenTarget::ReadRegisterClasses() const {
   std::vector<Record*> RegClasses =
     Records.getAllDerivedDefinitions("RegisterClass");
@@ -228,17 +235,30 @@ CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
             "' does not derive from the Register class!";
     Elements.push_back(Reg);
   }
-  
-  std::vector<Record*> SubRegClassList = 
-                        R->getValueAsListOfDefs("SubRegClassList");
-  for (unsigned i = 0, e = SubRegClassList.size(); i != e; ++i) {
-    Record *SubRegClass = SubRegClassList[i];
-    if (!SubRegClass->isSubClassOf("RegisterClass"))
-      throw "Register Class member '" + SubRegClass->getName() +
-            "' does not derive from the RegisterClass class!";
-    SubRegClasses.push_back(SubRegClass);
-  }  
-  
+
+  // SubRegClasses is a list<dag> containing (RC, subregindex, ...) dags.
+  ListInit *SRC = R->getValueAsListInit("SubRegClasses");
+  for (ListInit::const_iterator i = SRC->begin(), e = SRC->end(); i != e; ++i) {
+    DagInit *DAG = dynamic_cast<DagInit*>(*i);
+    if (!DAG) throw "SubRegClasses must contain DAGs";
+    DefInit *DAGOp = dynamic_cast<DefInit*>(DAG->getOperator());
+    Record *RCRec;
+    if (!DAGOp || !(RCRec = DAGOp->getDef())->isSubClassOf("RegisterClass"))
+      throw "Operator '" + DAG->getOperator()->getAsString() +
+        "' in SubRegClasses is not a RegisterClass";
+    // Iterate over args, all SubRegIndex instances.
+    for (DagInit::const_arg_iterator ai = DAG->arg_begin(), ae = DAG->arg_end();
+         ai != ae; ++ai) {
+      DefInit *Idx = dynamic_cast<DefInit*>(*ai);
+      Record *IdxRec;
+      if (!Idx || !(IdxRec = Idx->getDef())->isSubClassOf("SubRegIndex"))
+        throw "Argument '" + (*ai)->getAsString() +
+          "' in SubRegClasses is not a SubRegIndex";
+      if (!SubRegClasses.insert(std::make_pair(IdxRec, RCRec)).second)
+        throw "SubRegIndex '" + IdxRec->getName() + "' mentioned twice";
+    }
+  }
+
   // Allow targets to override the size in bits of the RegisterClass.
   unsigned Size = R->getValueAsInt("Size");
 
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 2926418..6b06b66 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -19,14 +19,12 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenInstruction.h"
+#include "Record.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
 #include <algorithm>
 
 namespace llvm {
 
-class Record;
-class RecordKeeper;
 struct CodeGenRegister;
 class CodeGenTarget;
 
@@ -65,9 +63,11 @@ class CodeGenTarget {
 
   mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
   mutable std::vector<CodeGenRegister> Registers;
+  mutable std::vector<Record*> SubRegIndices;
   mutable std::vector<CodeGenRegisterClass> RegisterClasses;
   mutable std::vector<MVT::SimpleValueType> LegalValueTypes;
   void ReadRegisters() const;
+  void ReadSubRegIndices() const;
   void ReadRegisterClasses() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
@@ -100,11 +100,25 @@ public:
     return Registers;
   }
 
+  const std::vector<Record*> &getSubRegIndices() const {
+    if (SubRegIndices.empty()) ReadSubRegIndices();
+    return SubRegIndices;
+  }
+
+  // Map a SubRegIndex Record to its number.
+  unsigned getSubRegIndexNo(Record *idx) const {
+    if (SubRegIndices.empty()) ReadSubRegIndices();
+    std::vector<Record*>::const_iterator i =
+      std::find(SubRegIndices.begin(), SubRegIndices.end(), idx);
+    assert(i != SubRegIndices.end() && "Not a SubRegIndex");
+    return (i - SubRegIndices.begin()) + 1;
+  }
+
   const std::vector<CodeGenRegisterClass> &getRegisterClasses() const {
     if (RegisterClasses.empty()) ReadRegisterClasses();
     return RegisterClasses;
   }
-  
+
   const CodeGenRegisterClass &getRegisterClass(Record *R) const {
     const std::vector<CodeGenRegisterClass> &RC = getRegisterClasses();
     for (unsigned i = 0, e = RC.size(); i != e; ++i)
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 9d469a9..eb528eb 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -224,6 +224,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   if (// Handle register references.  Nothing to do here, they always match.
       LeafRec->isSubClassOf("RegisterClass") || 
       LeafRec->isSubClassOf("PointerLikeRegClass") ||
+      LeafRec->isSubClassOf("SubRegIndex") ||
       // Place holder for SRCVALUE nodes. Nothing to do here.
       LeafRec->getName() == "srcvalue")
     return;
@@ -597,6 +598,14 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
+
+    // Handle a subregister index. This is used for INSERT_SUBREG etc.
+    if (DI->getDef()->isSubClassOf("SubRegIndex")) {
+      std::string Value = getQualifiedName(DI->getDef());
+      AddMatcher(new EmitStringIntegerMatcher(Value, MVT::i32));
+      ResultOps.push_back(NextRecordedOperandNo++);
+      return;
+    }
   }
   
   errs() << "unhandled leaf node: \n";
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index d3bf60e..0d5ee40 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -309,8 +309,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   REG("RST");
   REG("SEGMENT_REG");
   REG("DEBUG_REG");
-  REG("CONTROL_REG_32");
-  REG("CONTROL_REG_64");
+  REG("CONTROL_REG");
   
   IMM("i8imm");
   IMM("i16imm");
@@ -575,10 +574,12 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("s_cc_out");
   REG("tGPR");
   REG("DPR");
-  REG("SPR");
-  REG("QPR");
   REG("DPR_VFP2");
   REG("DPR_8");
+  REG("SPR");
+  REG("QPR");
+  REG("QQPR");
+  REG("QQQQPR");
   
   IMM("i32imm");
   IMM("bf_inv_mask_imm");
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index ba59e50..9ec9e08 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -31,7 +31,7 @@ namespace {
 struct InstructionMemo {
   std::string Name;
   const CodeGenRegisterClass *RC;
-  unsigned char SubRegNo;
+  std::string SubRegNo;
   std::vector<std::string>* PhysRegs;
 };
 
@@ -123,7 +123,7 @@ struct OperandsSignature {
   void PrintParameters(raw_ostream &OS) const {
     for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
       if (Operands[i] == "r") {
-        OS << "unsigned Op" << i;
+        OS << "unsigned Op" << i << ", bool Op" << i << "IsKill";
       } else if (Operands[i] == "i") {
         OS << "uint64_t imm" << i;
       } else if (Operands[i] == "f") {
@@ -149,7 +149,7 @@ struct OperandsSignature {
       if (PrintedArg)
         OS << ", ";
       if (Operands[i] == "r") {
-        OS << "Op" << i;
+        OS << "Op" << i << ", Op" << i << "IsKill";
         PrintedArg = true;
       } else if (Operands[i] == "i") {
         OS << "imm" << i;
@@ -167,7 +167,7 @@ struct OperandsSignature {
   void PrintArguments(raw_ostream &OS) const {
     for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
       if (Operands[i] == "r") {
-        OS << "Op" << i;
+        OS << "Op" << i << ", Op" << i << "IsKill";
       } else if (Operands[i] == "i") {
         OS << "imm" << i;
       } else if (Operands[i] == "f") {
@@ -278,7 +278,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     // For now, ignore instructions where the first operand is not an
     // output register.
     const CodeGenRegisterClass *DstRC = 0;
-    unsigned SubRegNo = ~0;
+    std::string SubRegNo;
     if (Op->getName() != "EXTRACT_SUBREG") {
       Record *Op0Rec = II.OperandList[0].Rec;
       if (!Op0Rec->isSubClassOf("RegisterClass"))
@@ -287,8 +287,11 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
       if (!DstRC)
         continue;
     } else {
-      SubRegNo = static_cast<IntInit*>(
-                 Dst->getChild(1)->getLeafValue())->getValue();
+      DefInit *SR = dynamic_cast<DefInit*>(Dst->getChild(1)->getLeafValue());
+      if (SR)
+        SubRegNo = getQualifiedName(SR->getDef());
+      else
+        SubRegNo = Dst->getChild(1)->getLeafValue()->getAsString();
     }
 
     // Inspect the pattern.
@@ -433,11 +436,11 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                      << (*Memo.PhysRegs)[i] << ", Op" << i << ", "
                      << "TM.getRegisterInfo()->getPhysicalRegisterRegClass("
                      << (*Memo.PhysRegs)[i] << "), "
-                     << "MRI.getRegClass(Op" << i << "));\n";
+                     << "MRI.getRegClass(Op" << i << "), DL);\n";
               }
               
               OS << "  return FastEmitInst_";
-              if (Memo.SubRegNo == (unsigned char)~0) {
+              if (Memo.SubRegNo.empty()) {
                 Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
                 OS << "(" << InstNS << Memo.Name << ", ";
                 OS << InstNS << Memo.RC->getName() << "RegisterClass";
@@ -447,8 +450,8 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                 OS << ");\n";
               } else {
                 OS << "extractsubreg(" << getName(RetVT);
-                OS << ", Op0, ";
-                OS << (unsigned)Memo.SubRegNo;
+                OS << ", Op0, Op0IsKill, ";
+                OS << Memo.SubRegNo;
                 OS << ");\n";
               }
               
@@ -527,12 +530,12 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                      << (*Memo.PhysRegs)[i] << ", Op" << i << ", "
                      << "TM.getRegisterInfo()->getPhysicalRegisterRegClass("
                      << (*Memo.PhysRegs)[i] << "), "
-                     << "MRI.getRegClass(Op" << i << "));\n";
+                     << "MRI.getRegClass(Op" << i << "), DL);\n";
               }
             
             OS << "  return FastEmitInst_";
             
-            if (Memo.SubRegNo == (unsigned char)~0) {
+            if (Memo.SubRegNo.empty()) {
               Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
               OS << "(" << InstNS << Memo.Name << ", ";
               OS << InstNS << Memo.RC->getName() << "RegisterClass";
@@ -541,8 +544,8 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
               Operands.PrintArguments(OS, *Memo.PhysRegs);
               OS << ");\n";
             } else {
-              OS << "extractsubreg(RetVT, Op0, ";
-              OS << (unsigned)Memo.SubRegNo;
+              OS << "extractsubreg(RetVT, Op0, Op0IsKill, ";
+              OS << Memo.SubRegNo;
               OS << ");\n";
             }
             
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 417aca7..d7a9051 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -30,6 +30,8 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
   if (TargetOnly && !Ints.empty())
     TargetPrefix = Ints[0].TargetPrefix;
 
+  EmitPrefix(OS);
+
   // Emit the enum information.
   EmitEnumInfo(Ints, OS);
 
@@ -59,6 +61,23 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
 
   // Emit code to translate GCC builtins into LLVM intrinsics.
   EmitIntrinsicToGCCBuiltinMap(Ints, OS);
+
+  EmitSuffix(OS);
+}
+
+void IntrinsicEmitter::EmitPrefix(raw_ostream &OS) {
+  OS << "// VisualStudio defines setjmp as _setjmp\n"
+        "#if defined(_MSC_VER) && defined(setjmp)\n"
+        "#define setjmp_undefined_for_visual_studio\n"
+        "#undef setjmp\n"
+        "#endif\n\n";
+}
+
+void IntrinsicEmitter::EmitSuffix(raw_ostream &OS) {
+  OS << "#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)\n"
+        "// let's return it to _setjmp state\n"
+        "#define setjmp _setjmp\n"
+        "#endif\n\n";
 }
 
 void IntrinsicEmitter::EmitEnumInfo(const std::vector<CodeGenIntrinsic> &Ints,
diff --git a/utils/TableGen/IntrinsicEmitter.h b/utils/TableGen/IntrinsicEmitter.h
index c3c92bc..b1efecb 100644
--- a/utils/TableGen/IntrinsicEmitter.h
+++ b/utils/TableGen/IntrinsicEmitter.h
@@ -28,6 +28,8 @@ namespace llvm {
       : Records(R), TargetOnly(T) {}
 
     void run(raw_ostream &OS);
+
+    void EmitPrefix(raw_ostream &OS);
     
     void EmitEnumInfo(const std::vector<CodeGenIntrinsic> &Ints, 
                       raw_ostream &OS);
@@ -50,6 +52,7 @@ namespace llvm {
                             raw_ostream &OS);
     void EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints, 
                                       raw_ostream &OS);
+    void EmitSuffix(raw_ostream &OS);
   };
 
 } // End llvm namespace
diff --git a/utils/TableGen/Makefile b/utils/TableGen/Makefile
index 7ea88de..f27cd99 100644
--- a/utils/TableGen/Makefile
+++ b/utils/TableGen/Makefile
@@ -1,10 +1,10 @@
 ##===- utils/TableGen/Makefile -----------------------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 576d626..8f9fd95 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -1461,7 +1461,7 @@ public:
 ///
 struct LessRecord {
   bool operator()(const Record *Rec1, const Record *Rec2) const {
-    return Rec1->getName() < Rec2->getName();
+    return StringRef(Rec1->getName()).compare_numeric(Rec2->getName()) < 0;
   }
 };
 
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index fcf4123..c99bbd9 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -35,14 +35,29 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
 
   if (!Namespace.empty())
     OS << "namespace " << Namespace << " {\n";
-  OS << "  enum {\n    NoRegister,\n";
+  OS << "enum {\n  NoRegister,\n";
 
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    OS << "    " << Registers[i].getName() << ", \t// " << i+1 << "\n";
-  OS << "    NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
-  OS << "  };\n";
+    OS << "  " << Registers[i].getName() << ", \t// " << i+1 << "\n";
+  OS << "  NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
+  OS << "};\n";
   if (!Namespace.empty())
     OS << "}\n";
+
+  const std::vector<Record*> SubRegIndices = Target.getSubRegIndices();
+  if (!SubRegIndices.empty()) {
+    OS << "\n// Subregister indices\n";
+    Namespace = SubRegIndices[0]->getValueAsString("Namespace");
+    if (!Namespace.empty())
+      OS << "namespace " << Namespace << " {\n";
+    OS << "enum {\n  NoSubRegister,\n";
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
+      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
+    OS << "  NUM_TARGET_SUBREGS = " << SubRegIndices.size()+1 << "\n";
+    OS << "};\n";
+    if (!Namespace.empty())
+      OS << "}\n";
+  }
   OS << "} // End llvm namespace \n";
 }
 
@@ -156,6 +171,90 @@ static void addSubSuperReg(Record *R, Record *S,
       addSubSuperReg(R, *I, SubRegs, SuperRegs, Aliases);
 }
 
+// Map SubRegIndex -> Register
+typedef std::map<Record*, Record*, LessRecord> SubRegMap;
+// Map Register -> SubRegMap
+typedef std::map<Record*, SubRegMap> AllSubRegMap;
+
+// Calculate all subregindices for Reg. Loopy subregs cause infinite recursion.
+static SubRegMap &inferSubRegIndices(Record *Reg, AllSubRegMap &ASRM) {
+  SubRegMap &SRM = ASRM[Reg];
+  if (!SRM.empty())
+    return SRM;
+  std::vector<Record*> SubRegs = Reg->getValueAsListOfDefs("SubRegs");
+  std::vector<Record*> Indices = Reg->getValueAsListOfDefs("SubRegIndices");
+  if (SubRegs.size() != Indices.size())
+    throw "Register " + Reg->getName() + " SubRegIndices doesn't match SubRegs";
+
+  // First insert the direct subregs and make sure they are fully indexed.
+  for (unsigned i = 0, e = SubRegs.size(); i != e; ++i) {
+    if (!SRM.insert(std::make_pair(Indices[i], SubRegs[i])).second)
+      throw "SubRegIndex " + Indices[i]->getName()
+        + " appears twice in Register " + Reg->getName();
+    inferSubRegIndices(SubRegs[i], ASRM);
+  }
+
+  // Keep track of inherited subregs and how they can be reached.
+  // Register -> (SubRegIndex, SubRegIndex)
+  typedef std::map<Record*, std::pair<Record*,Record*>, LessRecord> OrphanMap;
+  OrphanMap Orphans;
+
+  // Clone inherited subregs. Here the order is important - earlier subregs take
+  // precedence.
+  for (unsigned i = 0, e = SubRegs.size(); i != e; ++i) {
+    SubRegMap &M = ASRM[SubRegs[i]];
+    for (SubRegMap::iterator si = M.begin(), se = M.end(); si != se; ++si)
+      if (!SRM.insert(*si).second)
+        Orphans[si->second] = std::make_pair(Indices[i], si->first);
+  }
+
+  // Finally process the composites.
+  ListInit *Comps = Reg->getValueAsListInit("CompositeIndices");
+  for (unsigned i = 0, e = Comps->size(); i != e; ++i) {
+    DagInit *Pat = dynamic_cast<DagInit*>(Comps->getElement(i));
+    if (!Pat)
+      throw "Invalid dag '" + Comps->getElement(i)->getAsString()
+        + "' in CompositeIndices";
+    DefInit *BaseIdxInit = dynamic_cast<DefInit*>(Pat->getOperator());
+    if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex"))
+      throw "Invalid SubClassIndex in " + Pat->getAsString();
+
+    // Resolve list of subreg indices into R2.
+    Record *R2 = Reg;
+    for (DagInit::const_arg_iterator di = Pat->arg_begin(),
+         de = Pat->arg_end(); di != de; ++di) {
+      DefInit *IdxInit = dynamic_cast<DefInit*>(*di);
+      if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex"))
+        throw "Invalid SubClassIndex in " + Pat->getAsString();
+      SubRegMap::const_iterator ni = ASRM[R2].find(IdxInit->getDef());
+      if (ni == ASRM[R2].end())
+        throw "Composite " + Pat->getAsString() + " refers to bad index in "
+          + R2->getName();
+      R2 = ni->second;
+    }
+
+    // Insert composite index. Allow overriding inherited indices etc.
+    SRM[BaseIdxInit->getDef()] = R2;
+
+    // R2 is now directly addressable, no longer an orphan.
+    Orphans.erase(R2);
+  }
+
+  // Now, Orphans contains the inherited subregisters without a direct index.
+  if (!Orphans.empty()) {
+    errs() << "Error: Register " << getQualifiedName(Reg)
+           << " inherited subregisters without an index:\n";
+    for (OrphanMap::iterator i = Orphans.begin(), e = Orphans.end(); i != e;
+         ++i) {
+      errs() << "  " << getQualifiedName(i->first)
+             << " = " << i->second.first->getName()
+             << ", " << i->second.second->getName() << "\n";
+    }
+    abort();
+  }
+  return SRM;
+}
+
 class RegisterSorter {
 private:
   std::map<Record*, std::set<Record*>, LessRecord> &RegisterSubRegs;
@@ -243,80 +342,82 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     std::map<unsigned, std::set<unsigned> > SuperRegClassMap;
     OS << "\n";
 
-    // Emit the sub-register classes for each RegisterClass
-    for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
-      const CodeGenRegisterClass &RC = RegisterClasses[rc];
+    unsigned NumSubRegIndices = Target.getSubRegIndices().size();
+
+    if (NumSubRegIndices) {
+      // Emit the sub-register classes for each RegisterClass
+      for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+        const CodeGenRegisterClass &RC = RegisterClasses[rc];
+        std::vector<Record*> SRC(NumSubRegIndices);
+        for (DenseMap<Record*,Record*>::const_iterator
+             i = RC.SubRegClasses.begin(),
+             e = RC.SubRegClasses.end(); i != e; ++i) {
+          // Build SRC array.
+          unsigned idx = Target.getSubRegIndexNo(i->first);
+          SRC.at(idx-1) = i->second;
+
+          // Find the register class number of i->second for SuperRegClassMap.
+          for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) {
+            const CodeGenRegisterClass &RC2 =  RegisterClasses[rc2];
+            if (RC2.TheDef == i->second) {
+              SuperRegClassMap[rc2].insert(rc);
+              break;
+            }
+          }
+        }
 
-      // Give the register class a legal C name if it's anonymous.
-      std::string Name = RC.TheDef->getName();
+        // Give the register class a legal C name if it's anonymous.
+        std::string Name = RC.TheDef->getName();
 
-      OS << "  // " << Name
-         << " Sub-register Classes...\n"
-         << "  static const TargetRegisterClass* const "
-         << Name << "SubRegClasses[] = {\n    ";
+        OS << "  // " << Name
+           << " Sub-register Classes...\n"
+           << "  static const TargetRegisterClass* const "
+           << Name << "SubRegClasses[] = {\n    ";
 
-      bool Empty = true;
+        for (unsigned idx = 0; idx != NumSubRegIndices; ++idx) {
+          if (idx)
+            OS << ", ";
+          if (SRC[idx])
+            OS << "&" << getQualifiedName(SRC[idx]) << "RegClass";
+          else
+            OS << "0";
+        }
+        OS << "\n  };\n\n";
+      }
 
-      for (unsigned subrc = 0, subrcMax = RC.SubRegClasses.size();
-            subrc != subrcMax; ++subrc) {
-        unsigned rc2 = 0, e2 = RegisterClasses.size();
-        for (; rc2 != e2; ++rc2) {
-          const CodeGenRegisterClass &RC2 =  RegisterClasses[rc2];
-          if (RC.SubRegClasses[subrc]->getName() == RC2.getName()) {
+      // Emit the super-register classes for each RegisterClass
+      for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+        const CodeGenRegisterClass &RC = RegisterClasses[rc];
+
+        // Give the register class a legal C name if it's anonymous.
+        std::string Name = RC.TheDef->getName();
+
+        OS << "  // " << Name
+           << " Super-register Classes...\n"
+           << "  static const TargetRegisterClass* const "
+           << Name << "SuperRegClasses[] = {\n    ";
+
+        bool Empty = true;
+        std::map<unsigned, std::set<unsigned> >::iterator I =
+          SuperRegClassMap.find(rc);
+        if (I != SuperRegClassMap.end()) {
+          for (std::set<unsigned>::iterator II = I->second.begin(),
+                 EE = I->second.end(); II != EE; ++II) {
+            const CodeGenRegisterClass &RC2 = RegisterClasses[*II];
             if (!Empty)
               OS << ", ";
             OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
             Empty = false;
-
-            std::map<unsigned, std::set<unsigned> >::iterator SCMI =
-              SuperRegClassMap.find(rc2);
-            if (SCMI == SuperRegClassMap.end()) {
-              SuperRegClassMap.insert(std::make_pair(rc2,
-                                                     std::set<unsigned>()));
-              SCMI = SuperRegClassMap.find(rc2);
-            }
-            SCMI->second.insert(rc);
-            break;
           }
         }
-        if (rc2 == e2)
-          throw "Register Class member '" +
-            RC.SubRegClasses[subrc]->getName() +
-            "' is not a valid RegisterClass!";
-      }
-
-      OS << (!Empty ? ", " : "") << "NULL";
-      OS << "\n  };\n\n";
-    }
-
-    // Emit the super-register classes for each RegisterClass
-    for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
-      const CodeGenRegisterClass &RC = RegisterClasses[rc];
-
-      // Give the register class a legal C name if it's anonymous.
-      std::string Name = RC.TheDef->getName();
 
-      OS << "  // " << Name
-         << " Super-register Classes...\n"
-         << "  static const TargetRegisterClass* const "
-         << Name << "SuperRegClasses[] = {\n    ";
-
-      bool Empty = true;
-      std::map<unsigned, std::set<unsigned> >::iterator I =
-        SuperRegClassMap.find(rc);
-      if (I != SuperRegClassMap.end()) {
-        for (std::set<unsigned>::iterator II = I->second.begin(),
-               EE = I->second.end(); II != EE; ++II) {
-          const CodeGenRegisterClass &RC2 = RegisterClasses[*II];
-          if (!Empty)
-            OS << ", ";
-          OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
-          Empty = false;
-        }
+        OS << (!Empty ? ", " : "") << "NULL";
+        OS << "\n  };\n\n";
       }
-
-      OS << (!Empty ? ", " : "") << "NULL";
-      OS << "\n  };\n\n";
+    } else {
+      // No subregindices in this target
+      OS << "  static const TargetRegisterClass* const "
+         << "NullRegClasses[] = { NULL };\n\n";
     }
 
     // Emit the sub-classes array for each RegisterClass
@@ -413,8 +514,10 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
          << RC.getName() + "VTs" << ", "
          << RC.getName() + "Subclasses" << ", "
          << RC.getName() + "Superclasses" << ", "
-         << RC.getName() + "SubRegClasses" << ", "
-         << RC.getName() + "SuperRegClasses" << ", "
+         << (NumSubRegIndices ? RC.getName() + "Sub" : std::string("Null"))
+         << "RegClasses, "
+         << (NumSubRegIndices ? RC.getName() + "Super" : std::string("Null"))
+         << "RegClasses, "
          << RC.SpillSize/8 << ", "
          << RC.SpillAlignment/8 << ", "
          << RC.CopyCost << ", "
@@ -436,7 +539,6 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   std::map<Record*, std::set<Record*>, LessRecord> RegisterSubRegs;
   std::map<Record*, std::set<Record*>, LessRecord> RegisterSuperRegs;
   std::map<Record*, std::set<Record*>, LessRecord> RegisterAliases;
-  std::map<Record*, std::vector<std::pair<int, Record*> > > SubRegVectors;
   typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
   DwarfRegNumsMapTy DwarfRegNums;
   
@@ -556,83 +658,6 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   delete [] SubregHashTable;
 
 
-  // Print the SuperregHashTable, a simple quadratically probed
-  // hash table for determining if a register is a super-register
-  // of another register.
-  unsigned NumSupRegs = 0;
-  RegNo.clear();
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    RegNo[Regs[i].TheDef] = i;
-    NumSupRegs += RegisterSuperRegs[Regs[i].TheDef].size();
-  }
-  
-  unsigned SuperregHashTableSize = 2 * NextPowerOf2(2 * NumSupRegs);
-  unsigned* SuperregHashTable = new unsigned[2 * SuperregHashTableSize];
-  std::fill(SuperregHashTable, SuperregHashTable + 2 * SuperregHashTableSize, ~0U);
-  
-  hashMisses = 0;
-  
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record* R = Regs[i].TheDef;
-    for (std::set<Record*>::iterator I = RegisterSuperRegs[R].begin(),
-         E = RegisterSuperRegs[R].end(); I != E; ++I) {
-      Record* RJ = *I;
-      // We have to increase the indices of both registers by one when
-      // computing the hash because, in the generated code, there
-      // will be an extra empty slot at register 0.
-      size_t index = ((i+1) + (RegNo[RJ]+1) * 37) & (SuperregHashTableSize-1);
-      unsigned ProbeAmt = 2;
-      while (SuperregHashTable[index*2] != ~0U &&
-             SuperregHashTable[index*2+1] != ~0U) {
-        index = (index + ProbeAmt) & (SuperregHashTableSize-1);
-        ProbeAmt += 2;
-        
-        hashMisses++;
-      }
-      
-      SuperregHashTable[index*2] = i;
-      SuperregHashTable[index*2+1] = RegNo[RJ];
-    }
-  }
-  
-  OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
-  
-  if (SuperregHashTableSize) {
-    std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
-    
-    OS << "  const unsigned SuperregHashTable[] = { ";
-    for (unsigned i = 0; i < SuperregHashTableSize - 1; ++i) {
-      if (i != 0)
-        // Insert spaces for nice formatting.
-        OS << "                                       ";
-      
-      if (SuperregHashTable[2*i] != ~0U) {
-        OS << getQualifiedName(Regs[SuperregHashTable[2*i]].TheDef) << ", "
-           << getQualifiedName(Regs[SuperregHashTable[2*i+1]].TheDef) << ", \n";
-      } else {
-        OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
-      }
-    }
-    
-    unsigned Idx = SuperregHashTableSize*2-2;
-    if (SuperregHashTable[Idx] != ~0U) {
-      OS << "                                       "
-         << getQualifiedName(Regs[SuperregHashTable[Idx]].TheDef) << ", "
-         << getQualifiedName(Regs[SuperregHashTable[Idx+1]].TheDef) << " };\n";
-    } else {
-      OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
-    }
-    
-    OS << "  const unsigned SuperregHashTableSize = "
-       << SuperregHashTableSize << ";\n";
-  } else {
-    OS << "  const unsigned SuperregHashTable[] = { ~0U, ~0U };\n"
-       << "  const unsigned SuperregHashTableSize = 1;\n";
-  }
-  
-  delete [] SuperregHashTable;
-
-
   // Print the AliasHashTable, a simple quadratically probed
   // hash table for determining if a register aliases another register.
   unsigned NumAliases = 0;
@@ -717,6 +742,8 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // to memory.
   for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
          I = RegisterAliases.begin(), E = RegisterAliases.end(); I != E; ++I) {
+    if (I->second.empty())
+      continue;
     OS << "  const unsigned " << I->first->getName() << "_AliasSet[] = { ";
     for (std::set<Record*>::iterator ASI = I->second.begin(),
            E = I->second.end(); ASI != E; ++ASI)
@@ -733,6 +760,8 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // sub-registers list to memory.
   for (std::map<Record*, std::set<Record*>, LessRecord>::iterator
          I = RegisterSubRegs.begin(), E = RegisterSubRegs.end(); I != E; ++I) {
+   if (I->second.empty())
+     continue;
     OS << "  const unsigned " << I->first->getName() << "_SubRegsSet[] = { ";
     std::vector<Record*> SubRegsVector;
     for (std::set<Record*>::iterator ASI = I->second.begin(),
@@ -754,6 +783,8 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // super-registers list to memory.
   for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
          I = RegisterSuperRegs.begin(), E = RegisterSuperRegs.end(); I != E; ++I) {
+    if (I->second.empty())
+      continue;
     OS << "  const unsigned " << I->first->getName() << "_SuperRegsSet[] = { ";
 
     std::vector<Record*> SuperRegsVector;
@@ -772,78 +803,77 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
 
   // Now that register alias and sub-registers sets have been emitted, emit the
   // register descriptors now.
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
-  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Registers[i];
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Regs[i];
     OS << "    { \"";
     OS << Reg.getName() << "\",\t";
-    if (RegisterAliases.count(Reg.TheDef))
+    if (!RegisterAliases[Reg.TheDef].empty())
       OS << Reg.getName() << "_AliasSet,\t";
     else
       OS << "Empty_AliasSet,\t";
-    if (RegisterSubRegs.count(Reg.TheDef))
+    if (!RegisterSubRegs[Reg.TheDef].empty())
       OS << Reg.getName() << "_SubRegsSet,\t";
     else
       OS << "Empty_SubRegsSet,\t";
-    if (RegisterSuperRegs.count(Reg.TheDef))
+    if (!RegisterSuperRegs[Reg.TheDef].empty())
       OS << Reg.getName() << "_SuperRegsSet },\n";
     else
       OS << "Empty_SuperRegsSet },\n";
   }
   OS << "  };\n";      // End of register descriptors...
+
+  // Emit SubRegIndex names, skipping 0
+  const std::vector<Record*> SubRegIndices = Target.getSubRegIndices();
+  OS << "\n  const char *const SubRegIndexTable[] = { \"";
+  for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
+    OS << SubRegIndices[i]->getName();
+    if (i+1 != e)
+      OS << "\", \"";
+  }
+  OS << "\" };\n\n";
   OS << "}\n\n";       // End of anonymous namespace...
 
   std::string ClassName = Target.getName() + "GenRegisterInfo";
 
   // Calculate the mapping of subregister+index pairs to physical registers.
-  std::vector<Record*> SubRegs = Records.getAllDerivedDefinitions("SubRegSet");
-  for (unsigned i = 0, e = SubRegs.size(); i != e; ++i) {
-    int subRegIndex = SubRegs[i]->getValueAsInt("index");
-    std::vector<Record*> From = SubRegs[i]->getValueAsListOfDefs("From");
-    std::vector<Record*> To   = SubRegs[i]->getValueAsListOfDefs("To");
-    
-    if (From.size() != To.size()) {
-      errs() << "Error: register list and sub-register list not of equal length"
-             << " in SubRegSet\n";
-      exit(1);
-    }
-    
-    // For each entry in from/to vectors, insert the to register at index 
-    for (unsigned ii = 0, ee = From.size(); ii != ee; ++ii)
-      SubRegVectors[From[ii]].push_back(std::make_pair(subRegIndex, To[ii]));
-  }
-  
+  AllSubRegMap AllSRM;
+
   // Emit the subregister + index mapping function based on the information
   // calculated above.
-  OS << "unsigned " << ClassName 
+  OS << "unsigned " << ClassName
      << "::getSubReg(unsigned RegNo, unsigned Index) const {\n"
      << "  switch (RegNo) {\n"
      << "  default:\n    return 0;\n";
-  for (std::map<Record*, std::vector<std::pair<int, Record*> > >::iterator 
-        I = SubRegVectors.begin(), E = SubRegVectors.end(); I != E; ++I) {
-    OS << "  case " << getQualifiedName(I->first) << ":\n";
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    SubRegMap &SRM = inferSubRegIndices(Regs[i].TheDef, AllSRM);
+    if (SRM.empty())
+      continue;
+    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
     OS << "    switch (Index) {\n";
     OS << "    default: return 0;\n";
-    for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-      OS << "    case " << (I->second)[i].first << ": return "
-         << getQualifiedName((I->second)[i].second) << ";\n";
+    for (SubRegMap::const_iterator ii = SRM.begin(), ie = SRM.end(); ii != ie;
+         ++ii)
+      OS << "    case " << getQualifiedName(ii->first)
+         << ": return " << getQualifiedName(ii->second) << ";\n";
     OS << "    };\n" << "    break;\n";
   }
   OS << "  };\n";
   OS << "  return 0;\n";
   OS << "}\n\n";
 
-  OS << "unsigned " << ClassName 
+  OS << "unsigned " << ClassName
      << "::getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {\n"
      << "  switch (RegNo) {\n"
      << "  default:\n    return 0;\n";
-  for (std::map<Record*, std::vector<std::pair<int, Record*> > >::iterator 
-        I = SubRegVectors.begin(), E = SubRegVectors.end(); I != E; ++I) {
-    OS << "  case " << getQualifiedName(I->first) << ":\n";
-    for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-      OS << "    if (SubRegNo == "
-         << getQualifiedName((I->second)[i].second)
-         << ")  return " << (I->second)[i].first << ";\n";
+   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+     SubRegMap &SRM = AllSRM[Regs[i].TheDef];
+     if (SRM.empty())
+       continue;
+    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    for (SubRegMap::const_iterator ii = SRM.begin(), ie = SRM.end(); ii != ie;
+         ++ii)
+      OS << "    if (SubRegNo == " << getQualifiedName(ii->second)
+         << ")  return " << getQualifiedName(ii->first) << ";\n";
     OS << "    return 0;\n";
   }
   OS << "  };\n";
@@ -853,11 +883,11 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // Emit the constructor of the class...
   OS << ClassName << "::" << ClassName
      << "(int CallFrameSetupOpcode, int CallFrameDestroyOpcode)\n"
-     << "  : TargetRegisterInfo(RegisterDescriptors, " << Registers.size()+1
-     << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n "
+     << "  : TargetRegisterInfo(RegisterDescriptors, " << Regs.size()+1
+     << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
+     << "                 SubRegIndexTable,\n"
      << "                 CallFrameSetupOpcode, CallFrameDestroyOpcode,\n"
      << "                 SubregHashTable, SubregHashTableSize,\n"
-     << "                 SuperregHashTable, SuperregHashTableSize,\n"
      << "                 AliasesHashTable, AliasesHashTableSize) {\n"
      << "}\n\n";
 
@@ -865,8 +895,8 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
 
   // First, just pull all provided information to the map
   unsigned maxLength = 0;
-  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    Record *Reg = Registers[i].TheDef;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record *Reg = Regs[i].TheDef;
     std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");
     maxLength = std::max((size_t)maxLength, RegNums.size());
     if (DwarfRegNums.count(Reg))
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 1c66399..17435f6 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -18,6 +18,7 @@
 #include "AsmMatcherEmitter.h"
 #include "AsmWriterEmitter.h"
 #include "CallingConvEmitter.h"
+#include "ClangASTNodesEmitter.h"
 #include "ClangDiagnosticsEmitter.h"
 #include "CodeEmitterGen.h"
 #include "DAGISelEmitter.h"
@@ -53,6 +54,7 @@ enum ActionType {
   GenCallingConv,
   GenClangDiagsDefs,
   GenClangDiagGroups,
+  GenClangStmtNodes,
   GenDAGISel,
   GenFastISel,
   GenOptParserDefs, GenOptParserImpl,
@@ -109,6 +111,8 @@ namespace {
                                "Generate Clang diagnostics definitions"),
                     clEnumValN(GenClangDiagGroups, "gen-clang-diag-groups",
                                "Generate Clang diagnostic groups"),
+                    clEnumValN(GenClangStmtNodes, "gen-clang-stmt-nodes",
+                               "Generate Clang AST statement nodes"),
                     clEnumValN(GenLLVMCConf, "gen-llvmc",
                                "Generate LLVMC configuration library"),
                     clEnumValN(GenEDHeader, "gen-enhanced-disassembly-header",
@@ -133,7 +137,7 @@ namespace {
   cl::list<std::string>
   IncludeDirs("I", cl::desc("Directory of include files"),
               cl::value_desc("directory"), cl::Prefix);
-  
+
   cl::opt<std::string>
   ClangComponent("clang-component",
                  cl::desc("Only use warnings from specified component"),
@@ -160,18 +164,18 @@ static bool ParseFile(const std::string &Filename,
   std::string ErrorStr;
   MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
   if (F == 0) {
-    errs() << "Could not open input file '" << Filename << "': " 
+    errs() << "Could not open input file '" << Filename << "': "
            << ErrorStr <<"\n";
     return true;
   }
-  
+
   // Tell SrcMgr about this buffer, which is what TGParser will pick up.
   SrcMgr.AddNewSourceBuffer(F, SMLoc());
 
   // Record the location of the include directory so that the lexer can find
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
-  
+
   TGParser Parser(SrcMgr);
 
   return Parser.ParseFile();
@@ -182,7 +186,7 @@ int main(int argc, char **argv) {
   PrettyStackTraceProgram X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);
 
-  
+
   // Parse the input file.
   if (ParseFile(InputFilename, IncludeDirs, SrcMgr))
     return 1;
@@ -193,7 +197,7 @@ int main(int argc, char **argv) {
     Out = new raw_fd_ostream(OutputFilename.c_str(), Error);
 
     if (!Error.empty()) {
-      errs() << argv[0] << ": error opening " << OutputFilename 
+      errs() << argv[0] << ": error opening " << OutputFilename
              << ":" << Error << "\n";
       return 1;
     }
@@ -244,6 +248,9 @@ int main(int argc, char **argv) {
     case GenClangDiagGroups:
       ClangDiagGroupsEmitter(Records).run(*Out);
       break;
+    case GenClangStmtNodes:
+      ClangStmtNodesEmitter(Records).run(*Out);
+      break;
     case GenDisassembler:
       DisassemblerEmitter(Records).run(*Out);
       break;
@@ -289,15 +296,15 @@ int main(int argc, char **argv) {
       assert(1 && "Invalid Action");
       return 1;
     }
-    
+
     if (Out != &outs())
       delete Out;                               // Close the file
     return 0;
-    
+
   } catch (const TGError &Error) {
     errs() << argv[0] << ": error:\n";
     PrintError(Error.getLoc(), Error.getMessage());
-    
+
   } catch (const std::string &Error) {
     errs() << argv[0] << ": " << Error << "\n";
   } catch (const char *Error) {
@@ -305,7 +312,7 @@ int main(int argc, char **argv) {
   } catch (...) {
     errs() << argv[0] << ": Unknown unexpected exception occurred.\n";
   }
-  
+
   if (Out != &outs()) {
     delete Out;                             // Close the file
     std::remove(OutputFilename.c_str());    // Remove the file, it's broken
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index b15db2f..b7085ae 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -230,6 +230,10 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
                                    const CodeGenInstruction &insn,
                                    InstrUID uid)
 {
+  // Ignore "asm parser only" instructions.
+  if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
+    return;
+  
   RecognizableInstr recogInstr(tables, insn, uid);
   
   recogInstr.emitInstructionSpecifier(tables);
@@ -835,8 +839,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("opaque512mem",        TYPE_M512)
   TYPE("SEGMENT_REG",         TYPE_SEGMENTREG)
   TYPE("DEBUG_REG",           TYPE_DEBUGREG)
-  TYPE("CONTROL_REG_32",      TYPE_CR32)
-  TYPE("CONTROL_REG_64",      TYPE_CR64)
+  TYPE("CONTROL_REG",         TYPE_CONTROLREG)
   TYPE("offset8",             TYPE_MOFFS8)
   TYPE("offset16",            TYPE_MOFFS16)
   TYPE("offset32",            TYPE_MOFFS32)
@@ -895,8 +898,7 @@ OperandEncoding RecognizableInstr::roRegisterEncodingFromString
   ENCODING("VR64",            ENCODING_REG)
   ENCODING("SEGMENT_REG",     ENCODING_REG)
   ENCODING("DEBUG_REG",       ENCODING_REG)
-  ENCODING("CONTROL_REG_32",  ENCODING_REG)
-  ENCODING("CONTROL_REG_64",  ENCODING_REG)
+  ENCODING("CONTROL_REG",     ENCODING_REG)
   errs() << "Unhandled reg/opcode register encoding " << s << "\n";
   llvm_unreachable("Unhandled reg/opcode register encoding");
 }
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index e0568d2..0f3b7eb 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -49,6 +49,9 @@ endif
 # Default to not install libLTO.dylib.
 INSTALL_LIBLTO := no
 
+# Default to do a native build, not a cross-build for an ARM host.
+ARM_HOSTED_BUILD := no
+
 ifndef RC_ProjectSourceVersion
 RC_ProjectSourceVersion = 9999
 endif
@@ -63,8 +66,12 @@ install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
 	  $(SRC)/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \
 	    $(SRC) $(PREFIX) $(DSTROOT) $(SYMROOT) \
 	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \
+	    $(ARM_HOSTED_BUILD) \
 	    $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) 
 
+EmbeddedHosted:
+	$(MAKE) ARM_HOSTED_BUILD=yes install
+
 # installhdrs does nothing, because the headers aren't useful until
 # the compiler is installed.
 installhdrs:
@@ -113,4 +120,4 @@ clean:
 $(OBJROOT) $(SYMROOT) $(DSTROOT):
 	mkdir -p $@
 
-.PHONY: install installsrc clean
+.PHONY: install installsrc clean EmbeddedHosted
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 78cc655..a9a11d9 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -46,11 +46,14 @@ LLVM_OPTIMIZED="$8"
 # should be installed.
 INSTALL_LIBLTO="$9"
 
-# The tenth parameter is the version number of the submission, e.g. 1007.
-LLVM_SUBMIT_VERSION="${10}"
+# A yes/no parameter that controls whether to cross-build for an ARM host.
+ARM_HOSTED_BUILD="${10}"
 
-# The eleventh parameter is the subversion number of the submission, e.g. 03.
-LLVM_SUBMIT_SUBVERSION="${11}"
+# The version number of the submission, e.g. 1007.
+LLVM_SUBMIT_VERSION="${11}"
+
+# The subversion number of the submission, e.g. 03.
+LLVM_SUBMIT_SUBVERSION="${12}"
 
 # The current working directory is where the build will happen. It may already
 # contain a partial result of an interrupted build, in which case this script
@@ -61,7 +64,7 @@ DARWIN_VERS=`uname -r | sed 's/\..*//'`
 echo DARWIN_VERS = $DARWIN_VERS
 
 DEVELOPER_DIR="${DEVELOPER_DIR-Developer}"
-if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
+if [ "$ARM_HOSTED_BUILD" = yes ]; then
     DT_HOME="$DEST_DIR/usr"
     HOST_SDKROOT=$SDKROOT
 else
@@ -89,8 +92,7 @@ sed -e '/[Aa]pple-style/d' -e '/include.*GNUmakefile/d' $ORIG_SRC_DIR/Makefile >
 mkdir -p $DIR/obj-llvm || exit 1
 cd $DIR/obj-llvm || exit 1
 
-
-if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
+if [ "$ARM_HOSTED_BUILD" = yes ]; then
   # The cross-tools' build process expects to find an existing cross toolchain
   # under names like 'arm-apple-darwin$DARWIN_VERS-as'; so make them.
   rm -rf $DIR/bin || exit 1
@@ -129,28 +131,21 @@ elif [ $DARWIN_VERS -gt 9 ]; then
   unset XTMPCC savedPATH
 fi
 
-
-if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
-  if [ \! -f Makefile.config ]; then
-      $SRC_DIR/configure --prefix=$DT_HOME \
-          --enable-targets=arm \
-          --host=arm-apple-darwin10 \
-          --target=arm-apple-darwin10 \
-          --build=i686-apple-darwin10 \
-          --enable-assertions=$LLVM_ASSERTIONS \
-          --enable-optimized=$LLVM_OPTIMIZED \
-          --disable-bindings \
-          || exit 1
-  fi
+if [ "$ARM_HOSTED_BUILD" = yes ]; then
+  configure_prefix=$DT_HOME
+  configure_opts="--enable-targets=arm --host=arm-apple-darwin10 \
+                  --target=arm-apple-darwin10 --build=i686-apple-darwin10"
 else
-  if [ \! -f Makefile.config ]; then
-      $SRC_DIR/configure --prefix=$DT_HOME/local \
-          --enable-targets=arm,x86,powerpc,cbe \
-          --enable-assertions=$LLVM_ASSERTIONS \
-          --enable-optimized=$LLVM_OPTIMIZED \
-          --disable-bindings \
-          || exit 1
-  fi
+  configure_prefix=$DT_HOME/local
+  configure_opts="--enable-targets=arm,x86,powerpc,cbe"
+fi
+
+if [ \! -f Makefile.config ]; then
+  $SRC_DIR/configure --prefix=$configure_prefix $configure_opts \
+    --enable-assertions=$LLVM_ASSERTIONS \
+    --enable-optimized=$LLVM_OPTIMIZED \
+    --disable-bindings \
+    || exit 1
 fi
 
 SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/[^.]*\.\([0-9]*\).*/\1/'`
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 9b62470..ac48591 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -71,7 +71,7 @@ class LitConfig:
         self.bashPath = Util.which('bash', os.pathsep.join(self.path))
         if self.bashPath is None:
             # Check some known paths.
-            for path in ('/bin/bash', '/usr/bin/bash'):
+            for path in ('/bin/bash', '/usr/bin/bash', '/usr/local/bin/bash'):
                 if os.path.exists(path):
                     self.bashPath = path
                     break
diff --git a/utils/lit/lit/ShUtil.py b/utils/lit/lit/ShUtil.py
index c8f9332..dda622a 100644
--- a/utils/lit/lit/ShUtil.py
+++ b/utils/lit/lit/ShUtil.py
@@ -67,6 +67,9 @@ class ShLexer:
             elif c == '"':
                 self.eat()
                 str += self.lex_arg_quoted('"')
+            elif c == "'":
+                self.eat()
+                str += self.lex_arg_quoted("'")
             elif not self.win32Escapes and c == '\\':
                 # Outside of a string, '\\' escapes everything.
                 self.eat()
@@ -287,6 +290,10 @@ class TestShParse(unittest.TestCase):
                          Pipeline([Command(['echo', 'hello'], [])], False))
         self.assertEqual(self.parse('echo ""'),
                          Pipeline([Command(['echo', ''], [])], False))
+        self.assertEqual(self.parse("""echo -DFOO='a'"""),
+                         Pipeline([Command(['echo', '-DFOO=a'], [])], False))
+        self.assertEqual(self.parse('echo -DFOO="a"'),
+                         Pipeline([Command(['echo', '-DFOO=a'], [])], False))
 
     def test_redirection(self):
         self.assertEqual(self.parse('echo hello > c'),
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index 5e1a811..e52d0e4 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -183,8 +183,10 @@ class OneCommandPerFileTest:
             self.createTempInput(tmp, test)
             tmp.flush()
             cmd.append(tmp.name)
-        else:
+        elif hasattr(test, 'source_path'):
             cmd.append(test.source_path)
+        else:
+            cmd.append(test.getSourcePath())
 
         out, err, exitCode = TestRunner.executeCommand(cmd)
 
diff --git a/utils/lit/lit/lit.py b/utils/lit/lit/lit.py
index a29fa42..db0653f 100755
--- a/utils/lit/lit/lit.py
+++ b/utils/lit/lit/lit.py
@@ -258,9 +258,10 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
     lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
 
     # Search for tests.
-    for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
-                                                  litConfig, lc):
-        yield res
+    if lc.test_format is not None:
+        for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
+                                                      litConfig, lc):
+            yield res
 
     # Search subdirectories.
     for filename in os.listdir(source_path):
@@ -489,11 +490,27 @@ def main():
                                     isWindows = (platform.system()=='Windows'),
                                     params = userParams)
 
+    # Expand '@...' form in inputs.
+    actual_inputs = []
+    for input in inputs:
+        if os.path.exists(input) or not input.startswith('@'):
+            actual_inputs.append(input)
+        else:
+            f = open(input[1:])
+            try:
+                for ln in f:
+                    ln = ln.strip()
+                    if ln:
+                        actual_inputs.append(ln)
+            finally:
+                f.close()
+                    
+            
     # Load the tests from the inputs.
     tests = []
     testSuiteCache = {}
     localConfigCache = {}
-    for input in inputs:
+    for input in actual_inputs:
         prev = len(tests)
         tests.extend(getTests(input, litConfig,
                               testSuiteCache, localConfigCache)[1])
diff --git a/utils/valgrind/i386-pc-linux-gnu.supp b/utils/valgrind/i386-pc-linux-gnu.supp
new file mode 100644
index 0000000..f8fd99a
--- /dev/null
+++ b/utils/valgrind/i386-pc-linux-gnu.supp
@@ -0,0 +1,7 @@
+{
+   False leak under RegisterPass
+   Memcheck:Leak
+   ...
+   fun:_ZN83_GLOBAL_*PassRegistrar12RegisterPassERKN4llvm8PassInfoE
+   fun:_ZN4llvm8PassInfo12registerPassEv
+}
diff --git a/utils/valgrind/x86_64-pc-linux-gnu.supp b/utils/valgrind/x86_64-pc-linux-gnu.supp
new file mode 100644
index 0000000..f8fd99a
--- /dev/null
+++ b/utils/valgrind/x86_64-pc-linux-gnu.supp
@@ -0,0 +1,7 @@
+{
+   False leak under RegisterPass
+   Memcheck:Leak
+   ...
+   fun:_ZN83_GLOBAL_*PassRegistrar12RegisterPassERKN4llvm8PassInfoE
+   fun:_ZN4llvm8PassInfo12registerPassEv
+}